001/*
002// This software is subject to the terms of the Eclipse Public License v1.0
003// Agreement, available at the following URL:
004// http://www.eclipse.org/legal/epl-v10.html.
005// You must accept the terms of that agreement to use this software.
006//
007// Copyright (C) 2001-2005 Julian Hyde
008// Copyright (C) 2005-2012 Pentaho and others
009// All Rights Reserved.
010//
011// jhyde, 28 August, 2001
012*/
013package mondrian.rolap.agg;
014
015import mondrian.olap.*;
016import mondrian.rolap.*;
017
018import java.util.*;
019import java.util.concurrent.Future;
020
021/**
022 * A <code>Aggregation</code> is a pre-computed aggregation over a set of
023 * columns.
024 *
025 * <p>Rollup operations:<ul>
026 * <li>drop an unrestricted column (e.g. state=*)</li>
027 * <li>tighten any restriction (e.g. year={1997,1998} becomes
028 * year={1997})</li>
029 * <li>restrict an unrestricted column (e.g. year=* becomes
030 * year={1997})</li>
031 * </ul>
032 *
033 * <p>Representation of aggregations. Sparse and dense representations are
034 * necessary for different data sets. Should adapt automatically. Use an
035 * interface to hold the data set, so the segment doesn't care.</p>
036 *
037 * Suppose we have a segment {year=1997, quarter={1,2,3},
038 * state={CA,WA}}. We want to roll up to a segment for {year=1997,
039 * state={CA,WA}}.  We need to know that we have all quarters.  We don't.
040 * Because year and quarter are independent, we know that we have all of
041 * the ...</p>
042 *
043 * <p>Suppose we have a segment specified by {region=West, state=*,
044 * year=*}, which materializes to ({West}, {CA,WA,OR}, {1997,1998}).
045 * Because state=*, we can rollup to {region=West, year=*} or {region=West,
046 * year=1997}.</p>
047 *
048 * <p>The space required for a segment depends upon the dimensionality (d),
049 * cell count (c) and the value count (v). We don't count the space
050 * required for the actual values, which is the same in any scheme.</p>
051 *
052 * @author jhyde
053 * @since 28 August, 2001
054 */
055public class Aggregation {
056
057    private final List<StarPredicate> compoundPredicateList;
058    private final RolapStar star;
059    private final BitKey constrainedColumnsBitKey;
060
061    /**
062     * Setting for optimizing SQL predicates.
063     */
064    private final int maxConstraints;
065
066    /**
067     * Timestamp of when the aggregation was created. (We use
068     * {@link java.util.Date} rather than {@link java.sql.Timestamp} because it
069     * has less baggage.)
070     */
071    private final Date creationTimestamp;
072
073    /**
074     * Creates an Aggregation.
075     *
076     * @param aggregationKey the key specifying the axes, the context and
077     *                       the RolapStar for this Aggregation
078     */
079    public Aggregation(
080        AggregationKey aggregationKey)
081    {
082        this.compoundPredicateList = aggregationKey.getCompoundPredicateList();
083        this.star = aggregationKey.getStar();
084        this.constrainedColumnsBitKey =
085            aggregationKey.getConstrainedColumnsBitKey();
086        this.maxConstraints =
087            MondrianProperties.instance().MaxConstraints.get();
088        this.creationTimestamp = new Date();
089    }
090
091    /**
092     * @return Returns the timestamp when the aggregation was created
093     */
094    public Date getCreationTimestamp() {
095        return creationTimestamp;
096    }
097
098    /**
099     * Loads a set of segments into this aggregation, one per measure,
100     * each constrained by the same set of column values, and each pinned
101     * once.
102     *
103     * <p>A Column and its constraints are accessed at the same level in their
104     * respective arrays.
105     *
106     * <p>For example,
107     * <blockquote><pre>
108     * measures = {unit_sales, store_sales},
109     * state = {CA, OR},
110     * gender = unconstrained</pre></blockquote>
111     *
112     * @param segmentFutures List of futures wherein each statement will place
113     *                       a list of the segments it has loaded, when it
114     *                       completes
115     */
116    public void load(
117        SegmentCacheManager cacheMgr,
118        int cellRequestCount,
119        RolapStar.Column[] columns,
120        List<RolapStar.Measure> measures,
121        StarColumnPredicate[] predicates,
122        GroupingSetsCollector groupingSetsCollector,
123        List<Future<Map<Segment, SegmentWithData>>> segmentFutures)
124    {
125        BitKey measureBitKey = getConstrainedColumnsBitKey().emptyCopy();
126        int axisCount = columns.length;
127        Util.assertTrue(predicates.length == axisCount);
128
129        List<Segment> segments =
130            createSegments(
131                columns, measures, measureBitKey, predicates);
132
133        // The constrained columns are simply the level and foreign columns
134        BitKey levelBitKey = getConstrainedColumnsBitKey();
135        GroupingSet groupingSet =
136            new GroupingSet(
137                segments, levelBitKey, measureBitKey, predicates, columns);
138        if (groupingSetsCollector.useGroupingSets()) {
139            groupingSetsCollector.add(groupingSet);
140        } else {
141            final SegmentLoader segmentLoader = new SegmentLoader(cacheMgr);
142            segmentLoader.load(
143                cellRequestCount,
144                new ArrayList<GroupingSet>(
145                    Collections.singletonList(groupingSet)),
146                compoundPredicateList,
147                segmentFutures);
148        }
149    }
150
151    private List<Segment> createSegments(
152        RolapStar.Column[] columns,
153        List<RolapStar.Measure> measures,
154        BitKey measureBitKey,
155        StarColumnPredicate[] predicates)
156    {
157        List<Segment> segments = new ArrayList<Segment>(measures.size());
158        for (RolapStar.Measure measure : measures) {
159            measureBitKey.set(measure.getBitPosition());
160            Segment segment =
161                new Segment(
162                    star,
163                    constrainedColumnsBitKey,
164                    columns,
165                    measure,
166                    predicates,
167                    Collections.<Segment.ExcludedRegion>emptyList(),
168                    compoundPredicateList);
169            segments.add(segment);
170        }
171        // It is important to sort the segments per measure bitkey.
172        // The order in which the measures come in is not deterministic.
173        // It actually depends on the order of the CellRequests.
174        // See: mondrian.rolap.BatchLoader.Batch.add(CellRequest request).
175        // Failure to sort them will give out wrong results (uses the wrong
176        // column) if we have more than one column in the grouping set.
177        Collections.sort(
178            segments, new Comparator<Segment>() {
179                public int compare(Segment o1, Segment o2) {
180                    return Integer.valueOf(
181                        o1.measure.getBitPosition())
182                            .compareTo(o2.measure.getBitPosition());
183                }
184            });
185        return segments;
186    }
187
188    /**
189     * Drops predicates, where the list of values is close to the values which
190     * would be returned anyway.
191     */
192    public StarColumnPredicate[] optimizePredicates(
193        RolapStar.Column[] columns,
194        StarColumnPredicate[] predicates)
195    {
196        RolapStar star = getStar();
197        Util.assertTrue(predicates.length == columns.length);
198        StarColumnPredicate[] newPredicates = predicates.clone();
199        double[] bloats = new double[columns.length];
200
201        // We want to handle the special case "drilldown" which occurs pretty
202        // often. Here, the parent is here as a constraint with a single member
203        // and the list of children as well.
204        List<Member> potentialParents = new ArrayList<Member>();
205        for (final StarColumnPredicate predicate : predicates) {
206            Member m;
207            if (predicate instanceof MemberColumnPredicate) {
208                m = ((MemberColumnPredicate) predicate).getMember();
209                potentialParents.add(m);
210            }
211        }
212
213        for (int i = 0; i < newPredicates.length; i++) {
214            // A set of constraints with only one entry will not be optimized
215            // away
216            if (!(newPredicates[i] instanceof ListColumnPredicate)) {
217                bloats[i] = 0.0;
218                continue;
219            }
220
221            final ListColumnPredicate newPredicate =
222                (ListColumnPredicate) newPredicates[i];
223            final List<StarColumnPredicate> predicateList =
224                newPredicate.getPredicates();
225            final int valueCount = predicateList.size();
226            if (valueCount < 2) {
227                bloats[i] = 0.0;
228                continue;
229            }
230
231            if (valueCount > maxConstraints) {
232                // Some databases can handle only a limited number of elements
233                // in 'WHERE IN (...)'. This set is greater than this database
234                // can handle, so we drop this constraint. Hopefully there are
235                // other constraints that will limit the result.
236                bloats[i] = 1.0; // will be optimized away
237                continue;
238            }
239
240            // more than one - check for children of same parent
241            double constraintLength = (double) valueCount;
242            Member parent = null;
243            Level level = null;
244            for (int j = 0; j < valueCount; j++) {
245                Object value = predicateList.get(j);
246                if (value instanceof MemberColumnPredicate) {
247                    MemberColumnPredicate memberColumnPredicate =
248                        (MemberColumnPredicate) value;
249                    Member m = memberColumnPredicate.getMember();
250                    if (j == 0) {
251                        parent = m.getParentMember();
252                        level = m.getLevel();
253                    } else {
254                        if (parent != null
255                            && !parent.equals(m.getParentMember()))
256                        {
257                            parent = null; // no common parent
258                        }
259                        if (level != null
260                            && !level.equals(m.getLevel()))
261                        {
262                            // should never occur, constraints are of same level
263                            level = null;
264                        }
265                    }
266                } else {
267                    // Value constraint with no associated member.
268                    // Compute bloat by #constraints / column cardinality.
269                    parent = null;
270                    level = null;
271                    bloats[i] = constraintLength / columns[i].getCardinality();
272                    break;
273                }
274            }
275            boolean done = false;
276            if (parent != null) {
277                // common parent exists
278                if (parent.isAll() || potentialParents.contains(parent)) {
279                    // common parent is there as constraint
280                    //  if the children are complete, this constraint set is
281                    //  unneccessary try to get the children directly from
282                    //  cache for the drilldown case, the children will be
283                    //  in the cache
284                    // - if not, forget this optimization.
285                    SchemaReader scr = star.getSchema().getSchemaReader();
286                    int childCount = scr.getChildrenCountFromCache(parent);
287                    if (childCount == -1) {
288                        // nothing gotten from cache
289                        if (!parent.isAll()) {
290                            // parent is in constraints
291                            // no information about children cardinality
292                            //  constraints must not be optimized away
293                            bloats[i] = 0.0;
294                            done = true;
295                        }
296                    } else {
297                        bloats[i] = constraintLength / childCount;
298                        done = true;
299                    }
300                }
301            }
302
303            if (!done && level != null) {
304                // if the level members are cached, we do not need "count *"
305                SchemaReader scr = star.getSchema().getSchemaReader();
306                int memberCount = scr.getLevelCardinality(level, true, false);
307                if (memberCount > 0) {
308                    bloats[i] = constraintLength / memberCount;
309                    done = true;
310                }
311            }
312
313            if (!done) {
314                bloats[i] = constraintLength / columns[i].getCardinality();
315            }
316        }
317
318        // build a list of constraints sorted by 'bloat factor'
319        ConstraintComparator comparator = new ConstraintComparator(bloats);
320        Integer[] indexes = new Integer[columns.length];
321        for (int i = 0; i < columns.length; i++) {
322            indexes[i] = i;
323        }
324
325        // sort indexes by bloat descending
326        Arrays.sort(indexes, comparator);
327
328        // Eliminate constraints one by one, until the constrained cell count
329        // became half of the unconstrained cell count. We can not have an
330        // absolute value here, because its
331        // very different if we fetch data for 2 years or 10 years (5 times
332        // more means 5 times slower). So a relative comparison is ok here
333        // but not an absolute one.
334
335        double abloat = 1.0;
336        final double aBloatLimit = .5;
337
338        for (Integer j : indexes) {
339            abloat = abloat * bloats[j];
340            if (abloat <= aBloatLimit) {
341                break;
342            }
343            // eliminate this constraint
344            if (MondrianProperties.instance().OptimizePredicates.get()
345                || bloats[j] == 1)
346            {
347                newPredicates[j] = new LiteralStarPredicate(columns[j], true);
348            }
349        }
350
351        // Now do simple structural optimizations, e.g. convert a list predicate
352        // with one element to a value predicate.
353        for (int i = 0; i < newPredicates.length; i++) {
354            newPredicates[i] = StarPredicates.optimize(newPredicates[i]);
355        }
356
357        return newPredicates;
358    }
359
360    /**
361     * This is called during SQL generation.
362     */
363    public RolapStar getStar() {
364        return star;
365    }
366
367    /**
368     * Returns the BitKey for ALL columns (Measures and Levels) involved in the
369     * query.
370     */
371    public BitKey getConstrainedColumnsBitKey() {
372        return constrainedColumnsBitKey;
373    }
374
375    // -- classes -------------------------------------------------------------
376
377    /**
378     * Helper class to figure out which axis values evaluate to true at least
379     * once by a given predicate.
380     *
381     * <p>Consider, for example, the flush predicate<blockquote><code>
382     *
383     * member between [Time].[1997].[Q3] and [Time].[1999].[Q1]
384     *
385     * </code></blockquote>applied to the segment <blockquote><code>
386     *
387     * year in (1996, 1997, 1998, 1999)<br/>
388     * quarter in (Q1, Q2, Q3, Q4)
389     *
390     * </code></blockquote> The predicate evaluates to true for the pairs
391     * <blockquote><code>
392     *
393     * {(1997, Q3), (1997, Q4),
394     * (1998, Q1), (1998, Q2), (1998, Q3), (1998, Q4), (1999, Q1)}
395     *
396     * </code></blockquote> and therefore we wish to eliminate these pairs from
397     * the segment. But we can eliminate a value only if <em>all</em> of its
398     * values are eliminated.
399     *
400     * <p>In this case, year=1998 is the only value which can be eliminated from
401     * the segment.
402     */
403    private static class ValuePruner {
404        /**
405         * Multi-column predicate. If the predicate evaluates to true, a cell
406         * will be removed from the segment. But we can only eliminate a value
407         * if all of its cells are eliminated.
408         */
409        private final StarPredicate flushPredicate;
410        /**
411         * Number of columns predicate depends on.
412         */
413        private final int arity;
414        /**
415         * For each column, the segment axis which the column corresponds to, or
416         * null.
417         */
418        private final SegmentAxis[] axes;
419        /**
420         * For each column, a bitmap of values for which the predicate is
421         * sometimes false. These values cannot be eliminated from the axis.
422         */
423        private final BitSet[] keepBitSets;
424        /**
425         * For each segment axis, the predicate column which depends on the
426         * axis, or -1.
427         */
428        private final int[] axisInverseOrdinals;
429        /**
430         * Workspace which contains the current key value for each column.
431         */
432        private final Object[] values;
433        /**
434         * View onto {@link #values} as a list.
435         */
436        private final List<Object> valueList;
437        /**
438         * Workspace which contains the ordinal of the current value of each
439         * column on its axis.
440         */
441        private final int[] ordinals;
442
443        private final SegmentDataset data;
444
445        private final CellKey cellKey;
446
447        /**
448         * Creates a ValuePruner.
449         *
450         * @param flushPredicate Multi-column predicate to test
451         * @param segmentAxes    Axes of the segment. (The columns that the
452         *                       predicate may not be present, or may
453         *                       be in a different order.)
454         * @param data           Segment dataset, which allows pruner
455         *                       to determine whether a particular
456         *                       cell is currently empty
457         */
458        ValuePruner(
459            StarPredicate flushPredicate,
460            SegmentAxis[] segmentAxes,
461            SegmentDataset data)
462        {
463            this.flushPredicate = flushPredicate;
464            this.arity = flushPredicate.getConstrainedColumnList().size();
465            this.axes = new SegmentAxis[arity];
466            this.keepBitSets = new BitSet[arity];
467            this.axisInverseOrdinals = new int[segmentAxes.length];
468            Arrays.fill(axisInverseOrdinals, -1);
469            this.values = new Object[arity];
470            this.valueList = Arrays.asList(values);
471            this.ordinals = new int[arity];
472            assert data != null;
473            this.data = data;
474            this.cellKey = CellKey.Generator.newCellKey(segmentAxes.length);
475
476            // Pair up constraint columns with axes. If one of the constraint's
477            // columns is not in this segment, it gets the null axis. The
478            // constraint will have to evaluate to true for all possible values
479            // of that column.
480            for (int i = 0; i < arity; i++) {
481                RolapStar.Column column =
482                    flushPredicate.getConstrainedColumnList().get(i);
483                int axisOrdinal =
484                    findAxis(segmentAxes, column.getBitPosition());
485                if (axisOrdinal < 0) {
486                    this.axes[i] = null;
487                    values[i] = StarPredicate.WILDCARD;
488                    keepBitSets[i] = new BitSet(1); // dummy
489                } else {
490                    axes[i] = segmentAxes[axisOrdinal];
491                    axisInverseOrdinals[axisOrdinal] = i;
492                    final int keyCount = axes[i].getKeys().length;
493                    keepBitSets[i] = new BitSet(keyCount);
494                }
495            }
496        }
497
498        private int findAxis(SegmentAxis[] axes, int bitPosition) {
499            for (int i = 0; i < axes.length; i++) {
500                SegmentAxis axis = axes[i];
501                if (axis.getPredicate().getConstrainedColumn().getBitPosition()
502                    == bitPosition)
503                {
504                    return i;
505                }
506            }
507            return -1;
508        }
509
510        /**
511         * Applies this ValuePruner's predicate and sets bits in axisBitSets
512         * to indicate extra values which can be removed.
513         *
514         * @param axisKeepBitSets Array containing, for each axis, a bitset
515         *                        of values to keep (not flush)
516         */
517        void go(BitSet[] axisKeepBitSets) {
518            evaluatePredicate(0);
519
520            // Clear bits in the axis bit sets (indicating that a value is never
521            // used) if this predicate evaluates to true for every combination
522            // of values which this axis value appears in.
523            for (int i = 0; i < axisKeepBitSets.length; i++) {
524                if (axisInverseOrdinals[i] < 0) {
525                    continue;
526                }
527                BitSet axisKeepBitSet = axisKeepBitSets[axisInverseOrdinals[i]];
528                final BitSet keepBitSet = keepBitSets[i];
529                axisKeepBitSet.and(keepBitSet);
530            }
531        }
532
533        /**
534         * Evaluates the predicate for axes <code>i</code> and higher, and marks
535         * {@link #keepBitSets} if the predicate ever evaluates to false.
536         * The result is that discardBitSets[i] will be false for column #i if
537         * the predicate evaluates to true for all cells in the segment which
538         * have that column value.
539         *
540         * @param axisOrdinal Axis ordinal
541         */
542        private void evaluatePredicate(int axisOrdinal) {
543            if (axisOrdinal == arity) {
544                // If the flush predicate evaluates to false for this cell,
545                // and this cell currently has some data (*),
546                // then none of the values which are the coordinates of this
547                // cell can be discarded.
548                //
549                // * Important when there is sparsity. Consider the cell
550                // {year=1997, quarter=Q1, month=12}. This cell would never have
551                // data, so there's no point keeping it.
552                if (!flushPredicate.evaluate(valueList)) {
553                    // REVIEW: getObject forces an int or double dataset to
554                    // create a boxed object; use exists() instead?
555                    if (data.getObject(cellKey) != null) {
556                        for (int k = 0; k < arity; k++) {
557                            keepBitSets[k].set(ordinals[k]);
558                        }
559                    }
560                }
561            } else {
562                final SegmentAxis axis = axes[axisOrdinal];
563                if (axis == null) {
564                    evaluatePredicate(axisOrdinal + 1);
565                } else {
566                    final Comparable[] keys = axis.getKeys();
567                    for (int keyOrdinal = 0;
568                        keyOrdinal < keys.length;
569                        keyOrdinal++)
570                    {
571                        Object key = keys[keyOrdinal];
572                        values[axisOrdinal] = key;
573                        ordinals[axisOrdinal] = keyOrdinal;
574                        cellKey.setAxis(
575                            axisInverseOrdinals[axisOrdinal],
576                            keyOrdinal);
577                        evaluatePredicate(axisOrdinal + 1);
578                    }
579                }
580            }
581        }
582    }
583
584    private static class ConstraintComparator implements Comparator<Integer> {
585        private final double[] bloats;
586
587        ConstraintComparator(double[] bloats) {
588            this.bloats = bloats;
589        }
590
591        // implement Comparator
592        // order by bloat descending
593        public int compare(Integer o0, Integer o1) {
594            double bloat0 = bloats[o0];
595            double bloat1 = bloats[o1];
596            return (bloat0 == bloat1)
597                ? 0
598                : (bloat0 < bloat1)
599                    ? 1
600                    : -1;
601        }
602    }
603
604
605}
606
607// End Aggregation.java