001/* 002// This software is subject to the terms of the Eclipse Public License v1.0 003// Agreement, available at the following URL: 004// http://www.eclipse.org/legal/epl-v10.html. 005// You must accept the terms of that agreement to use this software. 006// 007// Copyright (C) 2001-2005 Julian Hyde 008// Copyright (C) 2005-2012 Pentaho and others 009// All Rights Reserved. 010// 011// jhyde, 28 August, 2001 012*/ 013package mondrian.rolap.agg; 014 015import mondrian.olap.*; 016import mondrian.rolap.*; 017 018import java.util.*; 019import java.util.concurrent.Future; 020 021/** 022 * A <code>Aggregation</code> is a pre-computed aggregation over a set of 023 * columns. 024 * 025 * <p>Rollup operations:<ul> 026 * <li>drop an unrestricted column (e.g. state=*)</li> 027 * <li>tighten any restriction (e.g. year={1997,1998} becomes 028 * year={1997})</li> 029 * <li>restrict an unrestricted column (e.g. year=* becomes 030 * year={1997})</li> 031 * </ul> 032 * 033 * <p>Representation of aggregations. Sparse and dense representations are 034 * necessary for different data sets. Should adapt automatically. Use an 035 * interface to hold the data set, so the segment doesn't care.</p> 036 * 037 * Suppose we have a segment {year=1997, quarter={1,2,3}, 038 * state={CA,WA}}. We want to roll up to a segment for {year=1997, 039 * state={CA,WA}}. We need to know that we have all quarters. We don't. 040 * Because year and quarter are independent, we know that we have all of 041 * the ...</p> 042 * 043 * <p>Suppose we have a segment specified by {region=West, state=*, 044 * year=*}, which materializes to ({West}, {CA,WA,OR}, {1997,1998}). 045 * Because state=*, we can rollup to {region=West, year=*} or {region=West, 046 * year=1997}.</p> 047 * 048 * <p>The space required for a segment depends upon the dimensionality (d), 049 * cell count (c) and the value count (v). We don't count the space 050 * required for the actual values, which is the same in any scheme.</p> 051 * 052 * @author jhyde 053 * @since 28 August, 2001 054 */ 055public class Aggregation { 056 057 private final List<StarPredicate> compoundPredicateList; 058 private final RolapStar star; 059 private final BitKey constrainedColumnsBitKey; 060 061 /** 062 * Setting for optimizing SQL predicates. 063 */ 064 private final int maxConstraints; 065 066 /** 067 * Timestamp of when the aggregation was created. (We use 068 * {@link java.util.Date} rather than {@link java.sql.Timestamp} because it 069 * has less baggage.) 070 */ 071 private final Date creationTimestamp; 072 073 /** 074 * Creates an Aggregation. 075 * 076 * @param aggregationKey the key specifying the axes, the context and 077 * the RolapStar for this Aggregation 078 */ 079 public Aggregation( 080 AggregationKey aggregationKey) 081 { 082 this.compoundPredicateList = aggregationKey.getCompoundPredicateList(); 083 this.star = aggregationKey.getStar(); 084 this.constrainedColumnsBitKey = 085 aggregationKey.getConstrainedColumnsBitKey(); 086 this.maxConstraints = 087 MondrianProperties.instance().MaxConstraints.get(); 088 this.creationTimestamp = new Date(); 089 } 090 091 /** 092 * @return Returns the timestamp when the aggregation was created 093 */ 094 public Date getCreationTimestamp() { 095 return creationTimestamp; 096 } 097 098 /** 099 * Loads a set of segments into this aggregation, one per measure, 100 * each constrained by the same set of column values, and each pinned 101 * once. 102 * 103 * <p>A Column and its constraints are accessed at the same level in their 104 * respective arrays. 105 * 106 * <p>For example, 107 * <blockquote><pre> 108 * measures = {unit_sales, store_sales}, 109 * state = {CA, OR}, 110 * gender = unconstrained</pre></blockquote> 111 * 112 * @param segmentFutures List of futures wherein each statement will place 113 * a list of the segments it has loaded, when it 114 * completes 115 */ 116 public void load( 117 SegmentCacheManager cacheMgr, 118 int cellRequestCount, 119 RolapStar.Column[] columns, 120 List<RolapStar.Measure> measures, 121 StarColumnPredicate[] predicates, 122 GroupingSetsCollector groupingSetsCollector, 123 List<Future<Map<Segment, SegmentWithData>>> segmentFutures) 124 { 125 BitKey measureBitKey = getConstrainedColumnsBitKey().emptyCopy(); 126 int axisCount = columns.length; 127 Util.assertTrue(predicates.length == axisCount); 128 129 List<Segment> segments = 130 createSegments( 131 columns, measures, measureBitKey, predicates); 132 133 // The constrained columns are simply the level and foreign columns 134 BitKey levelBitKey = getConstrainedColumnsBitKey(); 135 GroupingSet groupingSet = 136 new GroupingSet( 137 segments, levelBitKey, measureBitKey, predicates, columns); 138 if (groupingSetsCollector.useGroupingSets()) { 139 groupingSetsCollector.add(groupingSet); 140 } else { 141 final SegmentLoader segmentLoader = new SegmentLoader(cacheMgr); 142 segmentLoader.load( 143 cellRequestCount, 144 new ArrayList<GroupingSet>( 145 Collections.singletonList(groupingSet)), 146 compoundPredicateList, 147 segmentFutures); 148 } 149 } 150 151 private List<Segment> createSegments( 152 RolapStar.Column[] columns, 153 List<RolapStar.Measure> measures, 154 BitKey measureBitKey, 155 StarColumnPredicate[] predicates) 156 { 157 List<Segment> segments = new ArrayList<Segment>(measures.size()); 158 for (RolapStar.Measure measure : measures) { 159 measureBitKey.set(measure.getBitPosition()); 160 Segment segment = 161 new Segment( 162 star, 163 constrainedColumnsBitKey, 164 columns, 165 measure, 166 predicates, 167 Collections.<Segment.ExcludedRegion>emptyList(), 168 compoundPredicateList); 169 segments.add(segment); 170 } 171 // It is important to sort the segments per measure bitkey. 172 // The order in which the measures come in is not deterministic. 173 // It actually depends on the order of the CellRequests. 174 // See: mondrian.rolap.BatchLoader.Batch.add(CellRequest request). 175 // Failure to sort them will give out wrong results (uses the wrong 176 // column) if we have more than one column in the grouping set. 177 Collections.sort( 178 segments, new Comparator<Segment>() { 179 public int compare(Segment o1, Segment o2) { 180 return Integer.valueOf( 181 o1.measure.getBitPosition()) 182 .compareTo(o2.measure.getBitPosition()); 183 } 184 }); 185 return segments; 186 } 187 188 /** 189 * Drops predicates, where the list of values is close to the values which 190 * would be returned anyway. 191 */ 192 public StarColumnPredicate[] optimizePredicates( 193 RolapStar.Column[] columns, 194 StarColumnPredicate[] predicates) 195 { 196 RolapStar star = getStar(); 197 Util.assertTrue(predicates.length == columns.length); 198 StarColumnPredicate[] newPredicates = predicates.clone(); 199 double[] bloats = new double[columns.length]; 200 201 // We want to handle the special case "drilldown" which occurs pretty 202 // often. Here, the parent is here as a constraint with a single member 203 // and the list of children as well. 204 List<Member> potentialParents = new ArrayList<Member>(); 205 for (final StarColumnPredicate predicate : predicates) { 206 Member m; 207 if (predicate instanceof MemberColumnPredicate) { 208 m = ((MemberColumnPredicate) predicate).getMember(); 209 potentialParents.add(m); 210 } 211 } 212 213 for (int i = 0; i < newPredicates.length; i++) { 214 // A set of constraints with only one entry will not be optimized 215 // away 216 if (!(newPredicates[i] instanceof ListColumnPredicate)) { 217 bloats[i] = 0.0; 218 continue; 219 } 220 221 final ListColumnPredicate newPredicate = 222 (ListColumnPredicate) newPredicates[i]; 223 final List<StarColumnPredicate> predicateList = 224 newPredicate.getPredicates(); 225 final int valueCount = predicateList.size(); 226 if (valueCount < 2) { 227 bloats[i] = 0.0; 228 continue; 229 } 230 231 if (valueCount > maxConstraints) { 232 // Some databases can handle only a limited number of elements 233 // in 'WHERE IN (...)'. This set is greater than this database 234 // can handle, so we drop this constraint. Hopefully there are 235 // other constraints that will limit the result. 236 bloats[i] = 1.0; // will be optimized away 237 continue; 238 } 239 240 // more than one - check for children of same parent 241 double constraintLength = (double) valueCount; 242 Member parent = null; 243 Level level = null; 244 for (int j = 0; j < valueCount; j++) { 245 Object value = predicateList.get(j); 246 if (value instanceof MemberColumnPredicate) { 247 MemberColumnPredicate memberColumnPredicate = 248 (MemberColumnPredicate) value; 249 Member m = memberColumnPredicate.getMember(); 250 if (j == 0) { 251 parent = m.getParentMember(); 252 level = m.getLevel(); 253 } else { 254 if (parent != null 255 && !parent.equals(m.getParentMember())) 256 { 257 parent = null; // no common parent 258 } 259 if (level != null 260 && !level.equals(m.getLevel())) 261 { 262 // should never occur, constraints are of same level 263 level = null; 264 } 265 } 266 } else { 267 // Value constraint with no associated member. 268 // Compute bloat by #constraints / column cardinality. 269 parent = null; 270 level = null; 271 bloats[i] = constraintLength / columns[i].getCardinality(); 272 break; 273 } 274 } 275 boolean done = false; 276 if (parent != null) { 277 // common parent exists 278 if (parent.isAll() || potentialParents.contains(parent)) { 279 // common parent is there as constraint 280 // if the children are complete, this constraint set is 281 // unneccessary try to get the children directly from 282 // cache for the drilldown case, the children will be 283 // in the cache 284 // - if not, forget this optimization. 285 SchemaReader scr = star.getSchema().getSchemaReader(); 286 int childCount = scr.getChildrenCountFromCache(parent); 287 if (childCount == -1) { 288 // nothing gotten from cache 289 if (!parent.isAll()) { 290 // parent is in constraints 291 // no information about children cardinality 292 // constraints must not be optimized away 293 bloats[i] = 0.0; 294 done = true; 295 } 296 } else { 297 bloats[i] = constraintLength / childCount; 298 done = true; 299 } 300 } 301 } 302 303 if (!done && level != null) { 304 // if the level members are cached, we do not need "count *" 305 SchemaReader scr = star.getSchema().getSchemaReader(); 306 int memberCount = scr.getLevelCardinality(level, true, false); 307 if (memberCount > 0) { 308 bloats[i] = constraintLength / memberCount; 309 done = true; 310 } 311 } 312 313 if (!done) { 314 bloats[i] = constraintLength / columns[i].getCardinality(); 315 } 316 } 317 318 // build a list of constraints sorted by 'bloat factor' 319 ConstraintComparator comparator = new ConstraintComparator(bloats); 320 Integer[] indexes = new Integer[columns.length]; 321 for (int i = 0; i < columns.length; i++) { 322 indexes[i] = i; 323 } 324 325 // sort indexes by bloat descending 326 Arrays.sort(indexes, comparator); 327 328 // Eliminate constraints one by one, until the constrained cell count 329 // became half of the unconstrained cell count. We can not have an 330 // absolute value here, because its 331 // very different if we fetch data for 2 years or 10 years (5 times 332 // more means 5 times slower). So a relative comparison is ok here 333 // but not an absolute one. 334 335 double abloat = 1.0; 336 final double aBloatLimit = .5; 337 338 for (Integer j : indexes) { 339 abloat = abloat * bloats[j]; 340 if (abloat <= aBloatLimit) { 341 break; 342 } 343 // eliminate this constraint 344 if (MondrianProperties.instance().OptimizePredicates.get() 345 || bloats[j] == 1) 346 { 347 newPredicates[j] = new LiteralStarPredicate(columns[j], true); 348 } 349 } 350 351 // Now do simple structural optimizations, e.g. convert a list predicate 352 // with one element to a value predicate. 353 for (int i = 0; i < newPredicates.length; i++) { 354 newPredicates[i] = StarPredicates.optimize(newPredicates[i]); 355 } 356 357 return newPredicates; 358 } 359 360 /** 361 * This is called during SQL generation. 362 */ 363 public RolapStar getStar() { 364 return star; 365 } 366 367 /** 368 * Returns the BitKey for ALL columns (Measures and Levels) involved in the 369 * query. 370 */ 371 public BitKey getConstrainedColumnsBitKey() { 372 return constrainedColumnsBitKey; 373 } 374 375 // -- classes ------------------------------------------------------------- 376 377 /** 378 * Helper class to figure out which axis values evaluate to true at least 379 * once by a given predicate. 380 * 381 * <p>Consider, for example, the flush predicate<blockquote><code> 382 * 383 * member between [Time].[1997].[Q3] and [Time].[1999].[Q1] 384 * 385 * </code></blockquote>applied to the segment <blockquote><code> 386 * 387 * year in (1996, 1997, 1998, 1999)<br/> 388 * quarter in (Q1, Q2, Q3, Q4) 389 * 390 * </code></blockquote> The predicate evaluates to true for the pairs 391 * <blockquote><code> 392 * 393 * {(1997, Q3), (1997, Q4), 394 * (1998, Q1), (1998, Q2), (1998, Q3), (1998, Q4), (1999, Q1)} 395 * 396 * </code></blockquote> and therefore we wish to eliminate these pairs from 397 * the segment. But we can eliminate a value only if <em>all</em> of its 398 * values are eliminated. 399 * 400 * <p>In this case, year=1998 is the only value which can be eliminated from 401 * the segment. 402 */ 403 private static class ValuePruner { 404 /** 405 * Multi-column predicate. If the predicate evaluates to true, a cell 406 * will be removed from the segment. But we can only eliminate a value 407 * if all of its cells are eliminated. 408 */ 409 private final StarPredicate flushPredicate; 410 /** 411 * Number of columns predicate depends on. 412 */ 413 private final int arity; 414 /** 415 * For each column, the segment axis which the column corresponds to, or 416 * null. 417 */ 418 private final SegmentAxis[] axes; 419 /** 420 * For each column, a bitmap of values for which the predicate is 421 * sometimes false. These values cannot be eliminated from the axis. 422 */ 423 private final BitSet[] keepBitSets; 424 /** 425 * For each segment axis, the predicate column which depends on the 426 * axis, or -1. 427 */ 428 private final int[] axisInverseOrdinals; 429 /** 430 * Workspace which contains the current key value for each column. 431 */ 432 private final Object[] values; 433 /** 434 * View onto {@link #values} as a list. 435 */ 436 private final List<Object> valueList; 437 /** 438 * Workspace which contains the ordinal of the current value of each 439 * column on its axis. 440 */ 441 private final int[] ordinals; 442 443 private final SegmentDataset data; 444 445 private final CellKey cellKey; 446 447 /** 448 * Creates a ValuePruner. 449 * 450 * @param flushPredicate Multi-column predicate to test 451 * @param segmentAxes Axes of the segment. (The columns that the 452 * predicate may not be present, or may 453 * be in a different order.) 454 * @param data Segment dataset, which allows pruner 455 * to determine whether a particular 456 * cell is currently empty 457 */ 458 ValuePruner( 459 StarPredicate flushPredicate, 460 SegmentAxis[] segmentAxes, 461 SegmentDataset data) 462 { 463 this.flushPredicate = flushPredicate; 464 this.arity = flushPredicate.getConstrainedColumnList().size(); 465 this.axes = new SegmentAxis[arity]; 466 this.keepBitSets = new BitSet[arity]; 467 this.axisInverseOrdinals = new int[segmentAxes.length]; 468 Arrays.fill(axisInverseOrdinals, -1); 469 this.values = new Object[arity]; 470 this.valueList = Arrays.asList(values); 471 this.ordinals = new int[arity]; 472 assert data != null; 473 this.data = data; 474 this.cellKey = CellKey.Generator.newCellKey(segmentAxes.length); 475 476 // Pair up constraint columns with axes. If one of the constraint's 477 // columns is not in this segment, it gets the null axis. The 478 // constraint will have to evaluate to true for all possible values 479 // of that column. 480 for (int i = 0; i < arity; i++) { 481 RolapStar.Column column = 482 flushPredicate.getConstrainedColumnList().get(i); 483 int axisOrdinal = 484 findAxis(segmentAxes, column.getBitPosition()); 485 if (axisOrdinal < 0) { 486 this.axes[i] = null; 487 values[i] = StarPredicate.WILDCARD; 488 keepBitSets[i] = new BitSet(1); // dummy 489 } else { 490 axes[i] = segmentAxes[axisOrdinal]; 491 axisInverseOrdinals[axisOrdinal] = i; 492 final int keyCount = axes[i].getKeys().length; 493 keepBitSets[i] = new BitSet(keyCount); 494 } 495 } 496 } 497 498 private int findAxis(SegmentAxis[] axes, int bitPosition) { 499 for (int i = 0; i < axes.length; i++) { 500 SegmentAxis axis = axes[i]; 501 if (axis.getPredicate().getConstrainedColumn().getBitPosition() 502 == bitPosition) 503 { 504 return i; 505 } 506 } 507 return -1; 508 } 509 510 /** 511 * Applies this ValuePruner's predicate and sets bits in axisBitSets 512 * to indicate extra values which can be removed. 513 * 514 * @param axisKeepBitSets Array containing, for each axis, a bitset 515 * of values to keep (not flush) 516 */ 517 void go(BitSet[] axisKeepBitSets) { 518 evaluatePredicate(0); 519 520 // Clear bits in the axis bit sets (indicating that a value is never 521 // used) if this predicate evaluates to true for every combination 522 // of values which this axis value appears in. 523 for (int i = 0; i < axisKeepBitSets.length; i++) { 524 if (axisInverseOrdinals[i] < 0) { 525 continue; 526 } 527 BitSet axisKeepBitSet = axisKeepBitSets[axisInverseOrdinals[i]]; 528 final BitSet keepBitSet = keepBitSets[i]; 529 axisKeepBitSet.and(keepBitSet); 530 } 531 } 532 533 /** 534 * Evaluates the predicate for axes <code>i</code> and higher, and marks 535 * {@link #keepBitSets} if the predicate ever evaluates to false. 536 * The result is that discardBitSets[i] will be false for column #i if 537 * the predicate evaluates to true for all cells in the segment which 538 * have that column value. 539 * 540 * @param axisOrdinal Axis ordinal 541 */ 542 private void evaluatePredicate(int axisOrdinal) { 543 if (axisOrdinal == arity) { 544 // If the flush predicate evaluates to false for this cell, 545 // and this cell currently has some data (*), 546 // then none of the values which are the coordinates of this 547 // cell can be discarded. 548 // 549 // * Important when there is sparsity. Consider the cell 550 // {year=1997, quarter=Q1, month=12}. This cell would never have 551 // data, so there's no point keeping it. 552 if (!flushPredicate.evaluate(valueList)) { 553 // REVIEW: getObject forces an int or double dataset to 554 // create a boxed object; use exists() instead? 555 if (data.getObject(cellKey) != null) { 556 for (int k = 0; k < arity; k++) { 557 keepBitSets[k].set(ordinals[k]); 558 } 559 } 560 } 561 } else { 562 final SegmentAxis axis = axes[axisOrdinal]; 563 if (axis == null) { 564 evaluatePredicate(axisOrdinal + 1); 565 } else { 566 final Comparable[] keys = axis.getKeys(); 567 for (int keyOrdinal = 0; 568 keyOrdinal < keys.length; 569 keyOrdinal++) 570 { 571 Object key = keys[keyOrdinal]; 572 values[axisOrdinal] = key; 573 ordinals[axisOrdinal] = keyOrdinal; 574 cellKey.setAxis( 575 axisInverseOrdinals[axisOrdinal], 576 keyOrdinal); 577 evaluatePredicate(axisOrdinal + 1); 578 } 579 } 580 } 581 } 582 } 583 584 private static class ConstraintComparator implements Comparator<Integer> { 585 private final double[] bloats; 586 587 ConstraintComparator(double[] bloats) { 588 this.bloats = bloats; 589 } 590 591 // implement Comparator 592 // order by bloat descending 593 public int compare(Integer o0, Integer o1) { 594 double bloat0 = bloats[o0]; 595 double bloat1 = bloats[o1]; 596 return (bloat0 == bloat1) 597 ? 0 598 : (bloat0 < bloat1) 599 ? 1 600 : -1; 601 } 602 } 603 604 605} 606 607// End Aggregation.java