001/***************************************************************************** 002 * Copyright by The HDF Group. * 003 * Copyright by the Board of Trustees of the University of Illinois. * 004 * All rights reserved. * 005 * * 006 * This file is part of the HDF Java Products distribution. * 007 * The full copyright notice, including terms governing use, modification, * 008 * and redistribution, is contained in the files COPYING and Copyright.html. * 009 * COPYING can be found at the root of the source code distribution tree. * 010 * Or, see http://hdfgroup.org/products/hdf-java/doc/Copyright.html. * 011 * If you do not have access to either file, you may request a copy from * 012 * help@hdfgroup.org. * 013 ****************************************************************************/ 014 015package hdf.object; 016 017import java.lang.reflect.Array; 018import java.util.Vector; 019 020/** 021 * The abstract class provides general APIs to create and manipulate dataset 022 * objects, and retrieve dataset properties datatype and dimension sizes. 023 * <p> 024 * This class provides two convenient functions, read()/write(), to read/write 025 * data values. Reading/writing data may take many library calls if we use the 026 * library APIs directly. The read() and write functions hide all the details of 027 * these calls from users. 028 * 029 * @see hdf.object.ScalarDS 030 * @see hdf.object.CompoundDS 031 * 032 * @version 1.1 9/4/2007 033 * @author Peter X. Cao 034 */ 035public abstract class Dataset extends HObject { 036 /** 037 * 038 */ 039 private static final long serialVersionUID = -3360885430038261178L; 040 041 private final static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class); 042 043 /** 044 * The memory buffer that holds the raw data of the dataset. 045 */ 046 protected Object data; 047 048 /** 049 * The number of dimensions of the dataset. 050 */ 051 protected int rank; 052 053 /** 054 * The current dimension sizes of the dataset 055 */ 056 protected long[] dims; 057 058 /** 059 * The max dimension sizes of the dataset 060 */ 061 protected long[] maxDims; 062 063 /** 064 * Array that contains the number of data points selected (for read/write) 065 * in each dimension. 066 * <p> 067 * The select size must be less than or equal to the current dimension size. 068 * A subset of a rectangle selection is defined by the starting position and 069 * selected sizes. 070 * <p> 071 * For example, a 4 X 5 dataset 072 * 073 * <pre> 074 * 0, 1, 2, 3, 4 075 * 10, 11, 12, 13, 14 076 * 20, 21, 22, 23, 24 077 * 30, 31, 32, 33, 34 078 * long[] dims = {4, 5}; 079 * long[] startDims = {1, 2}; 080 * long[] selectedDims = {3, 3}; 081 * then the following subset is selected by the startDims and selectedDims above 082 * 12, 13, 14 083 * 22, 23, 24 084 * 32, 33, 34 085 * </pre> 086 */ 087 protected long[] selectedDims; 088 089 /** 090 * The starting position of each dimension of a selected subset. With both 091 * the starting position and selected sizes, the subset of a rectangle 092 * selection is fully defined. 093 */ 094 protected long[] startDims; 095 096 /** 097 * Array that contains the indices of the dimensions selected for display. 098 * <p> 099 * <B>selectedIndex[] is provided for two purpose:</B> 100 * <OL> 101 * <LI> 102 * selectedIndex[] is used to indicate the order of dimensions for display, 103 * i.e. selectedIndex[0] = row, selectedIndex[1] = column and 104 * selectedIndex[2] = depth. For example, for a four dimension dataset, if 105 * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index, 106 * dim[2] is selected as column index and dim[3] is selected as depth index. 107 * <LI> 108 * selectedIndex[] is also used to select dimensions for display for 109 * datasets with three or more dimensions. We assume that applications such 110 * as HDFView can only display data up to three dimensions (a 2D 111 * spreadsheet/image with a third dimension that the 2D spreadsheet/image is 112 * cut from). For dataset with more than three dimensions, we need 113 * selectedIndex[] to store which three dimensions are chosen for display. 114 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 115 * then dim[1] is selected as row index, dim[2] is selected as column index 116 * and dim[3] is selected as depth index. dim[0] is not selected. Its 117 * location is fixed at 0 by default. 118 * </OL> 119 */ 120 protected final int[] selectedIndex; 121 122 /** 123 * The number of elements to move from the start location in each dimension. 124 * For example, if selectedStride[0] = 2, every other data point is selected 125 * along dim[0]. 126 */ 127 protected long[] selectedStride; 128 129 /** 130 * The array of dimension sizes for a chunk. 131 */ 132 protected long[] chunkSize; 133 134 /** The compression information. */ 135 protected String compression; 136 public final static String compression_gzip_txt = "GZIP: level = "; 137 138 /** The filters information. */ 139 protected String filters; 140 141 /** The storage information. */ 142 protected String storage; 143 144 /** The datatype object of the dataset. */ 145 protected Datatype datatype; 146 147 /** 148 * Array of strings that represent the dimension names. It is null if 149 * dimension names do not exist. 150 */ 151 protected String[] dimNames; 152 153 /** Flag to indicate if the byte[] array is converted to strings */ 154 protected boolean convertByteToString = true; 155 156 /** Flag to indicate if data values are loaded into memory. */ 157 protected boolean isDataLoaded = false; 158 159 /** The number of data points in the memory buffer. */ 160 protected long nPoints = 1; 161 162 /** 163 * The data buffer that contains the raw data directly reading from file 164 * (before any data conversion). 165 */ 166 protected Object originalBuf = null; 167 168 /** 169 * The array that holds the converted data of unsigned C-type integers. 170 * <p> 171 * For example, Suppose that the original data is an array of unsigned 172 * 16-bit short integers. Since Java does not support unsigned integer, the 173 * data is converted to an array of 32-bit singed integer. In that case, the 174 * converted buffer is the array of 32-bit singed integer. 175 */ 176 protected Object convertedBuf = null; 177 178 /** 179 * Flag to indicate if the enum data is converted to strings. 180 */ 181 protected boolean enumConverted = false; 182 183 /** 184 * Constructs a Dataset object with a given file, name and path. 185 * 186 * @param theFile 187 * the file that contains the dataset. 188 * @param name 189 * the name of the Dataset, e.g. "dset1". 190 * @param path 191 * the full group path of this Dataset, e.g. "/arrays/". 192 */ 193 public Dataset(FileFormat theFile, String name, String path) { 194 this(theFile, name, path, null); 195 } 196 197 /** 198 * @deprecated Not for public use in the future. <br> 199 * Using {@link #Dataset(FileFormat, String, String)} 200 * 201 * @param theFile 202 * the file that contains the dataset. 203 * @param name 204 * the name of the Dataset, e.g. "dset1". 205 * @param path 206 * the full group path of this Dataset, e.g. "/arrays/". 207 * @param oid 208 * the oid of this Dataset. 209 */ 210 @Deprecated 211 public Dataset(FileFormat theFile, String name, String path, long[] oid) { 212 super(theFile, name, path, oid); 213 214 rank = 0; 215 data = null; 216 dims = null; 217 maxDims = null; 218 selectedDims = null; 219 startDims = null; 220 selectedStride = null; 221 chunkSize = null; 222 compression = "NONE"; 223 filters = "NONE"; 224 storage = "NONE"; 225 dimNames = null; 226 227 selectedIndex = new int[3]; 228 selectedIndex[0] = 0; 229 selectedIndex[1] = 1; 230 selectedIndex[2] = 2; 231 } 232 233 /** 234 * Clears memory held by the dataset, such as data buffer. 235 */ 236 public void clear() { 237 if (data != null) { 238 if (data instanceof Vector) { 239 ((Vector) data).setSize(0); 240 } 241 data = null; 242 originalBuf = null; 243 convertedBuf = null; 244 } 245 isDataLoaded = false; 246 } 247 248 /** 249 * Retrieves datatype and dataspace information from file and sets the 250 * dataset in memory. 251 * <p> 252 * The init() is designed to support lazy operation in dataset object. When 253 * a data object is retrieved from file, the datatype, dataspace and raw 254 * data are not loaded into memory. When it is asked to read the raw data 255 * from file, init() is first called to get the datatype and dataspace 256 * information, then load the raw data from file. 257 * <p> 258 * init() is also used to reset selection of a dataset (start, stride and 259 * count) to the default, which is the entire dataset for 1D or 2D datasets. 260 * In the following example, init() at step 1) retrieve datatype and 261 * dataspace information from file. getData() at step 3) read only one data 262 * point. init() at step 4) reset the selection to the whole dataset. 263 * getData() at step 4) reads the values of whole dataset into memory. 264 * 265 * <pre> 266 * dset = (Dataset) file.get(NAME_DATASET); 267 * 268 * // 1) get datatype and dataspace information from file 269 * dset.init(); 270 * rank = dset.getRank(); // rank = 2, a 2D dataset 271 * count = dset.getSelectedDims(); 272 * start = dset.getStartDims(); 273 * dims = dset.getDims(); 274 * 275 * // 2) select only one data point 276 * for (int i = 0; i < rank; i++) { 277 * start[0] = 0; 278 * count[i] = 1; 279 * } 280 * 281 * // 3) read one data point 282 * data = dset.getData(); 283 * 284 * // 4) reset to select the whole dataset 285 * dset.init(); 286 * 287 * // 5) clean the memory data buffer 288 * dset.clearData(); 289 * 290 * // 6) Read the whole dataset 291 * data = dset.getData(); 292 * </pre> 293 */ 294 public abstract void init(); 295 296 /** 297 * Returns the rank (number of dimensions) of the dataset. 298 * 299 * @return the number of dimensions of the dataset. 300 */ 301 public final int getRank() { 302 if (rank < 0) init(); 303 304 return rank; 305 } 306 307 /** 308 * Returns the array that contains the dimension sizes of the dataset. 309 * 310 * @return the dimension sizes of the dataset. 311 */ 312 public final long[] getDims() { 313 if (rank < 0) init(); 314 315 return dims; 316 } 317 318 /** 319 * Returns the array that contains the max dimension sizes of the dataset. 320 * 321 * @return the max dimension sizes of the dataset. 322 */ 323 public final long[] getMaxDims() { 324 if (rank < 0) init(); 325 326 if (maxDims == null) return dims; 327 328 return maxDims; 329 } 330 331 /** 332 * Returns the dimension sizes of the selected subset. 333 * <p> 334 * The SelectedDims is the number of data points of the selected subset. 335 * Applications can use this array to change the size of selected subset. 336 * 337 * The select size must be less than or equal to the current dimension size. 338 * Combined with the starting position, selected sizes and stride, the 339 * subset of a rectangle selection is fully defined. 340 * <p> 341 * For example, a 4 X 5 dataset 342 * 343 * <pre> 344 * 0, 1, 2, 3, 4 345 * 10, 11, 12, 13, 14 346 * 20, 21, 22, 23, 24 347 * 30, 31, 32, 33, 34 348 * long[] dims = {4, 5}; 349 * long[] startDims = {1, 2}; 350 * long[] selectedDims = {3, 3}; 351 * long[] selectedStride = {1, 1}; 352 * then the following subset is selected by the startDims and selectedDims 353 * 12, 13, 14 354 * 22, 23, 24 355 * 32, 33, 34 356 * </pre> 357 * 358 * @return the dimension sizes of the selected subset. 359 */ 360 public final long[] getSelectedDims() { 361 if (rank < 0) init(); 362 363 return selectedDims; 364 } 365 366 /** 367 * Returns the starting position of a selected subset. 368 * <p> 369 * Applications can use this array to change the starting position of a 370 * selection. Combined with the selected dimensions, selected sizes and 371 * stride, the subset of a rectangle selection is fully defined. 372 * <p> 373 * For example, a 4 X 5 dataset 374 * 375 * <pre> 376 * 0, 1, 2, 3, 4 377 * 10, 11, 12, 13, 14 378 * 20, 21, 22, 23, 24 379 * 30, 31, 32, 33, 34 380 * long[] dims = {4, 5}; 381 * long[] startDims = {1, 2}; 382 * long[] selectedDims = {3, 3}; 383 * long[] selectedStride = {1, 1}; 384 * then the following subset is selected by the startDims and selectedDims 385 * 12, 13, 14 386 * 22, 23, 24 387 * 32, 33, 34 388 * </pre> 389 * 390 * @return the starting position of a selected subset. 391 */ 392 public final long[] getStartDims() { 393 if (rank < 0) init(); 394 395 return startDims; 396 } 397 398 /** 399 * Returns the selectedStride of the selected dataset. 400 * <p> 401 * Applications can use this array to change how many elements to move in 402 * each dimension. 403 * 404 * Combined with the starting position and selected sizes, the subset of a 405 * rectangle selection is defined. 406 * <p> 407 * For example, a 4 X 5 dataset 408 * 409 * <pre> 410 * 0, 1, 2, 3, 4 411 * 10, 11, 12, 13, 14 412 * 20, 21, 22, 23, 24 413 * 30, 31, 32, 33, 34 414 * long[] dims = {4, 5}; 415 * long[] startDims = {0, 0}; 416 * long[] selectedDims = {2, 2}; 417 * long[] selectedStride = {2, 3}; 418 * then the following subset is selected by the startDims and selectedDims 419 * 0, 3 420 * 20, 23 421 * </pre> 422 * 423 * @return the selectedStride of the selected dataset. 424 */ 425 public final long[] getStride() { 426 if (rank < 0) init(); 427 428 if (rank <= 0) { 429 return null; 430 } 431 432 if (selectedStride == null) { 433 selectedStride = new long[rank]; 434 for (int i = 0; i < rank; i++) { 435 selectedStride[i] = 1; 436 } 437 } 438 439 return selectedStride; 440 } 441 442 /** 443 * Sets the flag that indicates if a byte array is converted to a string 444 * array. 445 * <p> 446 * In a string dataset, the raw data from file is stored in a byte array. By 447 * default, this byte array is converted to an array of strings. For a large 448 * dataset (e.g. more than one million strings), the converson takes a long 449 * time and requires a lot of memory space to store the strings. At some 450 * applications, such a conversion can be delayed. For example, A GUI 451 * application may convert only part of the strings that are visible to the 452 * users, not the entire data array. 453 * <p> 454 * setConvertByteToString(boolean b) allows users to set the flag so that 455 * applications can choose to perform the byte-to-string conversion or not. 456 * If the flag is set to false, the getData() returns a array of byte 457 * instead of an array of strings. 458 * 459 * @param b 460 * convert bytes to strings if b is true; otherwise, if false, do 461 * not convert bytes to strings. 462 */ 463 public final void setConvertByteToString(boolean b) { 464 convertByteToString = b; 465 } 466 467 /** 468 * Returns the flag that indicates if a byte array is converted to a string 469 * array.. 470 * 471 * @return true if byte array is converted to string; otherwise, returns 472 * false if there is no conversion. 473 */ 474 public final boolean getConvertByteToString() { 475 return convertByteToString; 476 } 477 478 /** 479 * Reads the data from file. 480 * <p> 481 * read() reads the data from file to a memory buffer and returns the memory 482 * buffer. The dataset object does not hold the memory buffer. To store the 483 * memory buffer in the dataset object, one must call getData(). 484 * <p> 485 * By default, the whole dataset is read into memory. Users can also select 486 * subset to read. Subsetting is done in an implicit way. 487 * <p> 488 * <b>How to Select a Subset</b> 489 * <p> 490 * A selection is specified by three arrays: start, stride and count. 491 * <ol> 492 * <li>start: offset of a selection 493 * <li>stride: determining how many elements to move in each dimension 494 * <li>count: number of elements to select in each dimension 495 * </ol> 496 * getStartDims(), getStartDims() and getSelectedDims() returns the start, 497 * stride and count arrays respectively. Applications can make a selection 498 * by changing the values of the arrays. 499 * <p> 500 * The following example shows how to make a subset. In the example, the 501 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 502 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 503 * We want to select every other data point in dims[1] and dims[2] 504 * 505 * <pre> 506 * int rank = dataset.getRank(); // number of dimension of the dataset 507 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 508 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataset 509 * long[] start = dataset.getStartDims(); // the off set of the selection 510 * long[] stride = dataset.getStride(); // the stride of the dataset 511 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for 512 * // display 513 * 514 * // select dim1 and dim2 as 2D data for display,and slice through dim0 515 * selectedIndex[0] = 1; 516 * selectedIndex[1] = 2; 517 * selectedIndex[1] = 0; 518 * 519 * // reset the selection arrays 520 * for (int i = 0; i < rank; i++) { 521 * start[i] = 0; 522 * selected[i] = 1; 523 * stride[i] = 1; 524 * } 525 * 526 * // set stride to 2 on dim1 and dim2 so that every other data points are 527 * // selected. 528 * stride[1] = 2; 529 * stride[2] = 2; 530 * 531 * // set the selection size of dim1 and dim2 532 * selected[1] = dims[1] / stride[1]; 533 * selected[2] = dims[1] / stride[2]; 534 * 535 * // when dataset.getData() is called, the selection above will be used since 536 * // the dimension arrays are passed by reference. Changes of these arrays 537 * // outside the dataset object directly change the values of these array 538 * // in the dataset object. 539 * </pre> 540 * <p> 541 * For ScalarDS, the memory data buffer is an one-dimensional array of byte, 542 * short, int, float, double or String type based on the datatype of the 543 * dataset. 544 * <p> 545 * For CompoundDS, the memory data object is an java.util.List object. Each 546 * element of the list is a data array that corresponds to a compound field. 547 * <p> 548 * For example, if compound dataset "comp" has the following nested 549 * structure, and member datatypes 550 * 551 * <pre> 552 * comp --> m01 (int) 553 * comp --> m02 (float) 554 * comp --> nest1 --> m11 (char) 555 * comp --> nest1 --> m12 (String) 556 * comp --> nest1 --> nest2 --> m21 (long) 557 * comp --> nest1 --> nest2 --> m22 (double) 558 * </pre> 559 * 560 * getData() returns a list of six arrays: {int[], float[], char[], 561 * String[], long[] and double[]}. 562 * 563 * @return the data read from file. 564 * 565 * @see #getData() 566 * 567 * @throws Exception if object can not be read 568 * @throws OutOfMemoryError if memory is exhausted 569 */ 570 public abstract Object read() throws Exception, OutOfMemoryError; 571 572 /** 573 * Reads the raw data of the dataset from file to a byte array. 574 * <p> 575 * readBytes() reads raw data to an array of bytes instead of array of its 576 * datatype. For example, for an one-dimension 32-bit integer dataset of 577 * size 5, the readBytes() returns of a byte array of size 20 instead of an 578 * int array of 5. 579 * <p> 580 * readBytes() can be used to copy data from one dataset to another 581 * efficiently because the raw data is not converted to its native type, it 582 * saves memory space and CPU time. 583 * 584 * @return the byte array of the raw data. 585 * 586 * @throws Exception if data can not be read 587 */ 588 public abstract byte[] readBytes() throws Exception; 589 590 /** 591 * Writes a memory buffer to the dataset in file. 592 * 593 * @param buf 594 * the data to write 595 * 596 * @throws Exception if data can not be written 597 */ 598 public abstract void write(Object buf) throws Exception; 599 600 /** 601 * Writes the memory buffer of this dataset to file. 602 * 603 * @throws Exception if buffer can not be written 604 */ 605 public final void write() throws Exception { 606 if (data != null) { 607 write(data); 608 } 609 } 610 611 /** 612 * Creates a new dataset and writes the data buffer to the new dataset. 613 * <p> 614 * This function allows applications to create a new dataset for a given 615 * data buffer. For example, users can select a specific interesting part 616 * from a large image and create a new image with the selection. 617 * <p> 618 * The new dataset retains the datatype and dataset creation properties of 619 * this dataset. 620 * 621 * @param pgroup 622 * the group which the dataset is copied to. 623 * @param name 624 * the name of the new dataset. 625 * @param dims 626 * the dimension sizes of the the new dataset. 627 * @param data 628 * the data values of the subset to be copied. 629 * 630 * @return the new dataset. 631 * 632 * @throws Exception if dataset can not be copied 633 */ 634 public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception; 635 636 /** 637 * Returns the datatype object of the dataset. 638 * 639 * @return the datatype object of the dataset. 640 */ 641 public abstract Datatype getDatatype(); 642 643 /** 644 * Returns the data buffer of the dataset in memory. 645 * <p> 646 * If data is already loaded into memory, returns the data; otherwise, calls 647 * read() to read data from file into a memory buffer and returns the memory 648 * buffer. 649 * <p> 650 * By default, the whole dataset is read into memory. Users can also select 651 * subset to read. Subsetting is done in an implicit way. 652 * <p> 653 * <b>How to Select a Subset</b> 654 * <p> 655 * A selection is specified by three arrays: start, stride and count. 656 * <ol> 657 * <li>start: offset of a selection 658 * <li>stride: determining how many elements to move in each dimension 659 * <li>count: number of elements to select in each dimension 660 * </ol> 661 * getStartDims(), getStartDims() and getSelectedDims() returns the start, 662 * stride and count arrays respectively. Applications can make a selection 663 * by changing the values of the arrays. 664 * <p> 665 * The following example shows how to make a subset. In the example, the 666 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 667 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 668 * We want to select every other data point in dims[1] and dims[2] 669 * 670 * <pre> 671 * int rank = dataset.getRank(); // number of dimension of the dataset 672 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 673 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet 674 * long[] start = dataset.getStartDims(); // the off set of the selection 675 * long[] stride = dataset.getStride(); // the stride of the dataset 676 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for 677 * // display 678 * 679 * // select dim1 and dim2 as 2D data for display,and slice through dim0 680 * selectedIndex[0] = 1; 681 * selectedIndex[1] = 2; 682 * selectedIndex[1] = 0; 683 * 684 * // reset the selection arrays 685 * for (int i = 0; i < rank; i++) { 686 * start[i] = 0; 687 * selected[i] = 1; 688 * stride[i] = 1; 689 * } 690 * 691 * // set stride to 2 on dim1 and dim2 so that every other data points are 692 * // selected. 693 * stride[1] = 2; 694 * stride[2] = 2; 695 * 696 * // set the selection size of dim1 and dim2 697 * selected[1] = dims[1] / stride[1]; 698 * selected[2] = dims[1] / stride[2]; 699 * 700 * // when dataset.getData() is called, the slection above will be used since 701 * // the dimension arrays are passed by reference. Changes of these arrays 702 * // outside the dataset object directly change the values of these array 703 * // in the dataset object. 704 * </pre> 705 * <p> 706 * For ScalarDS, the memory data buffer is an one-dimensional array of byte, 707 * short, int, float, double or String type based on the datatype of the 708 * dataset. 709 * <p> 710 * For CompoundDS, the memory data object is an java.util.List object. Each 711 * element of the list is a data array that corresponds to a compound field. 712 * <p> 713 * For example, if compound dataset "comp" has the following nested 714 * structure, and memeber datatypes 715 * 716 * <pre> 717 * comp --> m01 (int) 718 * comp --> m02 (float) 719 * comp --> nest1 --> m11 (char) 720 * comp --> nest1 --> m12 (String) 721 * comp --> nest1 --> nest2 --> m21 (long) 722 * comp --> nest1 --> nest2 --> m22 (double) 723 * </pre> 724 * 725 * getData() returns a list of six arrays: {int[], float[], char[], 726 * String[], long[] and double[]}. 727 * 728 * @return the memory buffer of the dataset. 729 * 730 * @throws Exception if object can not be read 731 * @throws OutOfMemoryError if memory is exhausted 732 */ 733 public final Object getData() throws Exception, OutOfMemoryError { 734 if (!isDataLoaded) { 735 log.trace("getData: read"); 736 data = read(); // load the data; 737 originalBuf = data; 738 isDataLoaded = true; 739 nPoints = 1; 740 log.trace("getData: selectedDims length={}",selectedDims.length); 741 for (int j = 0; j < selectedDims.length; j++) { 742 nPoints *= selectedDims[j]; 743 } 744 log.trace("getData: read {}", nPoints); 745 } 746 747 return data; 748 } 749 750 /** 751 * @deprecated Not for public use in the future. 752 * <p> 753 * setData() is not safe to use because it changes memory buffer 754 * of the dataset object. Dataset operation such as write/read 755 * will fail if the buffer type or size is changed. 756 * 757 * @param d the object data 758 */ 759 @Deprecated 760 public final void setData(Object d) { 761 data = d; 762 } 763 764 /** 765 * Clears the data buffer in memory and to force the next read() to load 766 * data from file. 767 * <p> 768 * The function read() loads data from file into memory only if the data is 769 * not read. If data is already in memory, read() just returns the memory 770 * buffer. Sometimes we want to force read() to re-read data from file. For 771 * example, when the selection is changed, we need to re-read the data. 772 * 773 * clearData() clears the current memory buffer and force the read() to load 774 * the data from file. 775 * 776 * @see #getData() 777 * @see #read() 778 */ 779 public void clearData() { 780 isDataLoaded = false; 781 } 782 783 /** 784 * Returns the dimension size of the vertical axis. 785 * 786 * <p> 787 * This function is used by GUI applications such as HDFView. GUI 788 * applications display a dataset in a 2D table or 2D image. The display 789 * order is specified by the index array of selectedIndex as follow: 790 * <dl> 791 * <dt>selectedIndex[0] -- height</dt> 792 * <dd>The vertical axis</dd> 793 * <dt>selectedIndex[1] -- width</dt> 794 * <dd>The horizontal axis</dd> 795 * <dt>selectedIndex[2] -- depth</dt> 796 * <dd>The depth axis is used for 3 or more dimensional datasets.</dd> 797 * </dl> 798 * Applications can use getSelectedIndex() to access and change the display 799 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 800 * following code will set the height=200 and width=50. 801 * 802 * <pre> 803 * int[] selectedIndex = dataset.getSelectedIndex(); 804 * selectedIndex[0] = 0; 805 * selectedIndex[1] = 1; 806 * </pre> 807 * 808 * @see #getSelectedIndex() 809 * @see #getWidth() 810 * 811 * @return the size of dimension of the vertical axis. 812 */ 813 public final int getHeight() { 814 if (rank < 0) init(); 815 816 if ((selectedDims == null) || (selectedIndex == null)) { 817 return 0; 818 } 819 820 return (int) selectedDims[selectedIndex[0]]; 821 } 822 823 /** 824 * Returns the size of dimension of the horizontal axis. 825 * 826 * <p> 827 * This function is used by GUI applications such as HDFView. GUI 828 * applications display dataset a 2D Table or 2D Image. The display order is 829 * specified by the index array of selectedIndex as follow: 830 * <dl> 831 * <dt>selectedIndex[0] -- height</dt> 832 * <dd>The vertical axis</dd> 833 * <dt>selectedIndex[1] -- width</dt> 834 * <dd>The horizontal axis</dd> 835 * <dt>selectedIndex[2] -- depth</dt> 836 * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd> 837 * </dl> 838 * Applications can use getSelectedIndex() to access and change the display 839 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 840 * following code will set the height=200 and width=100. 841 * 842 * <pre> 843 * int[] selectedIndex = dataset.getSelectedIndex(); 844 * selectedIndex[0] = 0; 845 * selectedIndex[1] = 1; 846 * </pre> 847 * 848 * @see #getSelectedIndex() 849 * @see #getHeight() 850 * 851 * @return the size of dimension of the horizontal axis. 852 */ 853 public final int getWidth() { 854 if (rank < 0) init(); 855 856 if ((selectedDims == null) || (selectedIndex == null)) { 857 return 0; 858 } 859 860 if ((selectedDims.length < 2) || (selectedIndex.length < 2)) { 861 return 1; 862 } 863 864 return (int) selectedDims[selectedIndex[1]]; 865 } 866 867 /** 868 * Returns the indices of display order. 869 * <p> 870 * 871 * selectedIndex[] is provided for two purpose: 872 * <OL> 873 * <LI> 874 * selectedIndex[] is used to indicate the order of dimensions for display. 875 * selectedIndex[0] is for the row, selectedIndex[1] is for the column and 876 * selectedIndex[2] for the depth. 877 * <p> 878 * For example, for a four dimesion dataset, if selectedIndex[] = {1, 2, 3}, 879 * then dim[1] is selected as row index, dim[2] is selected as column index 880 * and dim[3] is selected as depth index. 881 * <LI> 882 * selectedIndex[] is also used to select dimensions for display for 883 * datasets with three or more dimensions. We assume that applications such 884 * as HDFView can only display data values up to three dimension (2D 885 * spreadsheet/image with a third dimension which the 2D spreadsheet/image 886 * is selected from). For dataset with more than three dimensions, we need 887 * selectedIndex[] to tell applications which three dimensions are chosen 888 * for display. <br> 889 * For example, for a four dimesion dataset, if selectedIndex[] = {1, 2, 3}, 890 * then dim[1] is selected as row index, dim[2] is selected as column index 891 * and dim[3] is selected as depth index. dim[0] is not selected. Its 892 * location is fixed at 0 by default. 893 * </OL> 894 * 895 * @return the array of the indices of display order. 896 */ 897 public final int[] getSelectedIndex() { 898 if (rank < 0) init(); 899 900 return selectedIndex; 901 } 902 903 /** 904 * Returns the string representation of compression information. 905 * <p> 906 * For example, 907 * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED". 908 * 909 * @return the string representation of compression information. 910 */ 911 public final String getCompression() { 912 if (rank < 0) init(); 913 914 return compression; 915 } 916 917 /** 918 * Returns the string representation of filter information. 919 * 920 * @return the string representation of filter information. 921 */ 922 public final String getFilters() { 923 if (rank < 0) init(); 924 925 return filters; 926 } 927 928 /** 929 * Returns the string representation of storage information. 930 * 931 * @return the string representation of storage information. 932 */ 933 public final String getStorage() { 934 if (rank < 0) init(); 935 936 return storage; 937 } 938 939 /** 940 * Returns the array that contains the dimension sizes of the chunk of the 941 * dataset. Returns null if the dataset is not chunked. 942 * 943 * @return the array of chunk sizes or returns null if the dataset is not 944 * chunked. 945 */ 946 public final long[] getChunkSize() { 947 if (rank < 0) init(); 948 949 return chunkSize; 950 } 951 952 /** 953 * @deprecated Not for public use in the future. <br> 954 * Using {@link #convertFromUnsignedC(Object, Object)} 955 * 956 * @param data_in the object data 957 * 958 * @return the converted object 959 */ 960 @Deprecated 961 public static Object convertFromUnsignedC(Object data_in) { 962 return Dataset.convertFromUnsignedC(data_in, null); 963 } 964 965 /** 966 * Converts one-dimension array of unsigned C-type integers to a new array 967 * of appropriate Java integer in memory. 968 * <p> 969 * Since Java does not support unsigned integer, values of unsigned C-type 970 * integers must be converted into its appropriate Java integer. Otherwise, 971 * the data value will not displayed correctly. For example, if an unsigned 972 * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of 973 * the correct value of 200. 974 * <p> 975 * Unsigned C integers are upgrade to Java integers according to the 976 * following table: 977 * <TABLE CELLSPACING=0 BORDER=1 CELLPADDING=5 WIDTH=400> 978 * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption> 979 * <TR> 980 * <TD><B>Unsigned C Integer</B></TD> 981 * <TD><B>JAVA Intege</B>r</TD> 982 * </TR> 983 * <TR> 984 * <TD>unsigned byte</TD> 985 * <TD>signed short</TD> 986 * </TR> 987 * <TR> 988 * <TD>unsigned short</TD> 989 * <TD>signed int</TD> 990 * </TR> 991 * <TR> 992 * <TD>unsigned int</TD> 993 * <TD>signed long</TD> 994 * </TR> 995 * <TR> 996 * <TD>unsigned long</TD> 997 * <TD>signed long</TD> 998 * </TR> 999 * </TABLE> 1000 * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers. 1001 * Therefore, the values of unsigned 64-bit dataset may be wrong in Java 1002 * application</strong>. 1003 * <p> 1004 * If memory data of unsigned integers is converted by 1005 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1006 * the data back to unsigned C before data is written into file. 1007 * 1008 * @see #convertToUnsignedC(Object, Object) 1009 * 1010 * @param data_in 1011 * the input 1D array of the unsigned C-type integers. 1012 * @param data_out 1013 * the output converted (or upgraded) 1D array of Java integers. 1014 * 1015 * @return the upgraded 1D array of Java integers. 1016 */ 1017 public static Object convertFromUnsignedC(Object data_in, Object data_out) { 1018 if (data_in == null) { 1019 return null; 1020 } 1021 1022 Class data_class = data_in.getClass(); 1023 if (!data_class.isArray()) { 1024 return null; 1025 } 1026 1027 if (data_out != null) { 1028 Class data_class_out = data_out.getClass(); 1029 if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) { 1030 data_out = null; 1031 } 1032 } 1033 1034 String cname = data_class.getName(); 1035 char dname = cname.charAt(cname.lastIndexOf("[") + 1); 1036 int size = Array.getLength(data_in); 1037 log.trace("convertFromUnsignedC: cname={} dname={} size={}", cname, dname, size); 1038 1039 if (dname == 'B') { 1040 short[] sdata = null; 1041 if (data_out == null) { 1042 sdata = new short[size]; 1043 } 1044 else { 1045 sdata = (short[]) data_out; 1046 } 1047 1048 byte[] bdata = (byte[]) data_in; 1049 for (int i = 0; i < size; i++) { 1050 sdata[i] = (short) ((bdata[i] + 256) & 0xFF); 1051 } 1052 1053 data_out = sdata; 1054 } 1055 else if (dname == 'S') { 1056 int[] idata = null; 1057 if (data_out == null) { 1058 idata = new int[size]; 1059 } 1060 else { 1061 idata = (int[]) data_out; 1062 } 1063 1064 short[] sdata = (short[]) data_in; 1065 for (int i = 0; i < size; i++) { 1066 idata[i] = (sdata[i] + 65536) & 0xFFFF; 1067 } 1068 1069 data_out = idata; 1070 } 1071 else if (dname == 'I') { 1072 long[] ldata = null; 1073 if (data_out == null) { 1074 ldata = new long[size]; 1075 } 1076 else { 1077 ldata = (long[]) data_out; 1078 } 1079 1080 int[] idata = (int[]) data_in; 1081 for (int i = 0; i < size; i++) { 1082 ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL; 1083 } 1084 1085 data_out = ldata; 1086 } 1087 else { 1088 data_out = data_in; 1089 log.debug("convertFromUnsignedC: Java does not support unsigned long"); 1090 } 1091 1092 return data_out; 1093 } 1094 1095 /** 1096 * @deprecated Not for public use in the future. <br> 1097 * Using {@link #convertToUnsignedC(Object, Object)} 1098 * 1099 * @param data_in 1100 * the input 1D array of the unsigned C-type integers. 1101 * 1102 * @return the upgraded 1D array of Java integers. 1103 */ 1104 @Deprecated 1105 public static Object convertToUnsignedC(Object data_in) { 1106 return Dataset.convertToUnsignedC(data_in, null); 1107 } 1108 1109 /** 1110 * Converts the array of converted unsigned integer back to unsigned C-type 1111 * integer data in memory. 1112 * <p> 1113 * If memory data of unsigned integers is converted by 1114 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1115 * the data back to unsigned C before data is written into file. 1116 * 1117 * @see #convertFromUnsignedC(Object, Object) 1118 * 1119 * @param data_in 1120 * the input array of the Java integer. 1121 * @param data_out 1122 * the output array of the unsigned C-type integer. 1123 * 1124 * @return the converted data of unsigned C-type integer array. 1125 */ 1126 public static Object convertToUnsignedC(Object data_in, Object data_out) { 1127 if (data_in == null) { 1128 return null; 1129 } 1130 1131 Class data_class = data_in.getClass(); 1132 if (!data_class.isArray()) { 1133 return null; 1134 } 1135 1136 if (data_out != null) { 1137 Class data_class_out = data_out.getClass(); 1138 if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) { 1139 data_out = null; 1140 } 1141 } 1142 1143 String cname = data_class.getName(); 1144 char dname = cname.charAt(cname.lastIndexOf("[") + 1); 1145 int size = Array.getLength(data_in); 1146 log.trace("convertToUnsignedC: cname={} dname={} size={}", cname, dname, size); 1147 1148 if (dname == 'S') { 1149 byte[] bdata = null; 1150 if (data_out == null) { 1151 bdata = new byte[size]; 1152 } 1153 else { 1154 bdata = (byte[]) data_out; 1155 } 1156 short[] sdata = (short[]) data_in; 1157 for (int i = 0; i < size; i++) { 1158 bdata[i] = (byte) sdata[i]; 1159 } 1160 data_out = bdata; 1161 } 1162 else if (dname == 'I') { 1163 short[] sdata = null; 1164 if (data_out == null) { 1165 sdata = new short[size]; 1166 } 1167 else { 1168 sdata = (short[]) data_out; 1169 } 1170 int[] idata = (int[]) data_in; 1171 for (int i = 0; i < size; i++) { 1172 sdata[i] = (short) idata[i]; 1173 } 1174 data_out = sdata; 1175 } 1176 else if (dname == 'J') { 1177 int[] idata = null; 1178 if (data_out == null) { 1179 idata = new int[size]; 1180 } 1181 else { 1182 idata = (int[]) data_out; 1183 } 1184 long[] ldata = (long[]) data_in; 1185 for (int i = 0; i < size; i++) { 1186 idata[i] = (int) ldata[i]; 1187 } 1188 data_out = idata; 1189 } 1190 else { 1191 data_out = data_in; 1192 log.debug("convertToUnsignedC: Java does not support unsigned long"); 1193 } 1194 1195 return data_out; 1196 } 1197 1198 /** 1199 * Converts an array of bytes into an array of Strings for a fixed string 1200 * dataset. 1201 * <p> 1202 * A C-string is an array of chars while an Java String is an object. When a 1203 * string dataset is read into Java application, the data is stored in an 1204 * array of Java bytes. byteToString() is used to convert the array of bytes 1205 * into array of Java strings so that applications can display and modify 1206 * the data content. 1207 * <p> 1208 * For example, the content of a two element C string dataset is {"ABC", 1209 * "abc"}. Java applications will read the data into an byte array of {65, 1210 * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java 1211 * String of strs[0]="ABC", and strs[1]="abc". 1212 * <p> 1213 * If memory data of strings is converted to Java Strings, stringToByte() 1214 * must be called to convert the memory data back to byte array before data 1215 * is written to file. 1216 * 1217 * @see #stringToByte(String[], int) 1218 * 1219 * @param bytes 1220 * the array of bytes to convert. 1221 * @param length 1222 * the length of string. 1223 * 1224 * @return the array of Java String. 1225 */ 1226 public static final String[] byteToString(byte[] bytes, int length) { 1227 if (bytes == null) { 1228 return null; 1229 } 1230 1231 int n = bytes.length / length; 1232 log.trace("byteToString: n={} from length of {}", n, length); 1233 // String bigstr = new String(bytes); 1234 String[] strArray = new String[n]; 1235 String str = null; 1236 int idx = 0; 1237 for (int i = 0; i < n; i++) { 1238 str = new String(bytes, i * length, length); 1239 // bigstr.substring uses less memory space 1240 // NOTE: bigstr does not work on linux if bytes.length is very large 1241 // see bug 1091 1242 // offset = i*length; 1243 // str = bigstr.substring(offset, offset+length); 1244 1245 idx = str.indexOf('\0'); 1246 if (idx > 0) { 1247 str = str.substring(0, idx); 1248 } 1249 1250 // trim only the end 1251 int end = str.length(); 1252 while (end > 0 && str.charAt(end - 1) <= '\u0020') 1253 end--; 1254 1255 strArray[i] = (end <= 0) ? "" : str.substring(0, end); 1256 1257 // trim both start and end 1258 // strArray[i] = str.trim(); 1259 } 1260 1261 return strArray; 1262 } 1263 1264 /** 1265 * Converts a string array into an array of bytes for a fixed string 1266 * dataset. 1267 * <p> 1268 * If memory data of strings is converted to Java Strings, stringToByte() 1269 * must be called to convert the memory data back to byte array before data 1270 * is written to file. 1271 * 1272 * @see #byteToString(byte[] bytes, int length) 1273 * 1274 * @param strings 1275 * the array of string. 1276 * @param length 1277 * the length of string. 1278 * 1279 * @return the array of bytes. 1280 */ 1281 public static final byte[] stringToByte(String[] strings, int length) { 1282 if (strings == null) { 1283 return null; 1284 } 1285 1286 int size = strings.length; 1287 byte[] bytes = new byte[size * length]; 1288 log.trace("stringToByte: size={} length={}", size, length); 1289 StringBuffer strBuff = new StringBuffer(length); 1290 for (int i = 0; i < size; i++) { 1291 // initialize the string with spaces 1292 strBuff.replace(0, length, " "); 1293 1294 if (strings[i] != null) { 1295 if (strings[i].length() > length) { 1296 strings[i] = strings[i].substring(0, length); 1297 } 1298 strBuff.replace(0, length, strings[i]); 1299 } 1300 1301 strBuff.setLength(length); 1302 System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length); 1303 } 1304 1305 return bytes; 1306 } 1307 1308 /** 1309 * Returns the array of strings that represent the dimension names. Returns 1310 * null if there is no dimension name. 1311 * <p> 1312 * Some datasets have pre-defined names for each dimension such as 1313 * "Latitude" and "Longitude". getDimNames() returns these pre-defined 1314 * names. 1315 * 1316 * @return the names of dimensions, or null if there is no dimension name. 1317 */ 1318 public final String[] getDimNames() { 1319 if (rank < 0) init(); 1320 1321 return dimNames; 1322 } 1323 1324 /** 1325 * Checks if a given datatype is a string. Sub-classes must replace this 1326 * default implementation. 1327 * 1328 * @param tid 1329 * The data type identifier. 1330 * 1331 * @return true if the datatype is a string; otherwise returns false. 1332 */ 1333 public boolean isString(int tid) { 1334 return false; 1335 } 1336 1337 /** 1338 * Returns the size in bytes of a given datatype. Sub-classes must replace 1339 * this default implementation. 1340 * 1341 * @param tid 1342 * The data type identifier. 1343 * 1344 * @return The size of the datatype 1345 */ 1346 public int getSize(int tid) { 1347 return -1; 1348 } 1349 1350 /** 1351 * Get flag that indicate if enum data is converted to strings. 1352 * 1353 * @return the enumConverted 1354 */ 1355 public boolean isEnumConverted() { 1356 return enumConverted; 1357 } 1358 1359 /** 1360 * Set flag that indicate if enum data is converted to strings. 1361 * 1362 * @param b 1363 * the enumConverted to set 1364 */ 1365 public void setEnumConverted(boolean b) { 1366 if (enumConverted != b) { 1367 originalBuf = convertedBuf = null; 1368 this.clearData(); 1369 } 1370 1371 enumConverted = b; 1372 } 1373 1374 /** 1375 * Get Class of the original data buffer if converted. 1376 * 1377 * @return the Class of originalBuf 1378 */ 1379 public final Class getOriginalClass() { 1380 return originalBuf.getClass(); 1381 } 1382}