001/* 002// This software is subject to the terms of the Eclipse Public License v1.0 003// Agreement, available at the following URL: 004// http://www.eclipse.org/legal/epl-v10.html. 005// You must accept the terms of that agreement to use this software. 006// 007// Copyright (C) 2007-2009 Pentaho and others 008// All Rights Reserved. 009*/ 010 011// Copyright (c) 1999-2007 CERN - European Organization for Nuclear Research. 012// Permission to use, copy, modify, distribute and sell this software 013// and its documentation for any purpose is hereby granted without fee, 014// provided that the above copyright notice appear in all copies and 015// that both that copyright notice and this permission notice appear in 016// supporting documentation. CERN makes no representations about the 017// suitability of this software for any purpose. It is provided "as is" 018// without expressed or implied warranty. 019 020// Created from package cern.colt.map by Richard Emberson, 2007/1/23. 021// For the source to the Colt project, go to: 022// http://dsd.lbl.gov/~hoschek/colt/ 023package mondrian.util; 024 025import java.io.PrintWriter; 026import java.util.Arrays; 027 028/** 029 * Not of interest for users; only for implementors of hashtables. 030 * Used to keep hash table capacities prime numbers. 031 * 032 * <p> 033 * Choosing prime numbers as hash table capacities is a good idea to keep 034 * them working fast, particularly under hash table expansions. 035 * <p> 036 * However, JDK 1.2, JGL 3.1 and many other toolkits do nothing to 037 * keep capacities prime. 038 * This class provides efficient means to choose prime capacities. 039 * <p> 040 * Choosing a prime is <tt>O(log 300)</tt> (binary search in a list of 041 * 300 int's). 042 * Memory requirements: 1 KB static memory. 043 * 044 * @author wolfgang.hoschek@cern.ch 045 * @version 1.0, 09/24/99 046 */ 047final class PrimeFinder { 048 /** 049 * The largest prime this class can generate; currently equal to 050 * <tt>Integer.MAX_VALUE</tt>. 051 * yes, it is prime. 052 */ 053 public static final int largestPrime = Integer.MAX_VALUE; 054 055 /** 056 * The prime number list consists of 11 chunks. 057 * Each chunk contains prime numbers. 058 * A chunk starts with a prime P1. The next element is a prime P2. P2 059 * is the smallest prime for which holds: P2 >= 2*P1. 060 * The next element is P3, for which the same holds with respect to P2, 061 * and so on. 062 * 063 * Chunks are chosen such that for any desired capacity >= 1000 064 * the list includes a prime number <= desired capacity * 1.11 (11%). 065 * For any desired capacity >= 200 066 * the list includes a prime number <= desired capacity * 1.16 (16%). 067 * For any desired capacity >= 16 068 * the list includes a prime number <= desired capacity * 1.21 (21%). 069 * 070 * Therefore, primes can be retrieved which are quite close to any desired 071 * capacity, which in turn avoids wasting memory. 072 * For example, the list includes 073 * 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081. 074 * So if you need a prime >= 1040, you will find a prime <= 1040*1.11=1154. 075 * 076 * Chunks are chosen such that they are optimized for a hashtable 077 * growthfactor of 2.0; 078 * If your hashtable has such a growthfactor then, 079 * after initially "rounding to a prime" upon hashtable construction, 080 * it will later expand to prime capacities such that there exist no 081 * better primes. 082 * 083 * In total these are about 32*10=320 numbers -> 1 KB of static memory 084 * needed. 085 * If you are stingy, then delete every second or fourth chunk. 086 */ 087 088 private static final int[] primeCapacities = { 089 //chunk #0 090 largestPrime, 091 092 //chunk #1 093 5, 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437, 094 102877, 205759, 411527, 823117, 1646237, 3292489, 6584983, 13169977, 095 26339969, 52679969, 105359939, 210719881, 421439783, 842879579, 096 1685759167, 097 098 //chunk #2 099 433, 877, 1759, 3527, 7057, 14143, 28289, 56591, 113189, 226379, 452759, 100 905551, 1811107, 3622219, 7244441, 14488931, 28977863, 57955739, 101 115911563, 231823147, 463646329, 927292699, 1854585413, 102 103 //chunk #3 104 953, 1907, 3821, 7643, 15287, 30577, 61169, 122347, 244703, 489407, 105 978821, 1957651, 3915341, 7830701, 15661423, 31322867, 62645741, 106 125291483, 250582987, 501165979, 1002331963, 2004663929, 107 108 //chunk #4 109 1039, 2081, 4177, 8363, 16729, 33461, 66923, 133853, 267713, 535481, 110 1070981, 2141977, 4283963, 8567929, 17135863, 34271747, 68543509, 111 137087021, 274174111, 548348231, 1096696463, 112 113 //chunk #5 114 31, 67, 137, 277, 557, 1117, 2237, 4481, 8963, 17929, 35863, 71741, 115 143483, 286973, 573953, 1147921, 2295859, 4591721, 9183457, 18366923, 116 36733847, 73467739, 146935499, 293871013, 587742049, 1175484103, 117 118 //chunk #6 119 599, 1201, 2411, 4831, 9677, 19373, 38747, 77509, 155027, 310081, 120 620171, 1240361, 2480729, 4961459, 9922933, 19845871, 39691759, 121 79383533, 158767069, 317534141, 635068283, 1270136683, 122 123 //chunk #7 124 311, 631, 1277, 2557, 5119, 10243, 20507, 41017, 82037, 164089, 328213, 125 656429, 1312867, 2625761, 5251529, 10503061, 21006137, 42012281, 126 84024581, 168049163, 336098327, 672196673, 1344393353, 127 128 //chunk #8 129 3, 7, 17, 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911, 43853, 130 87719, 175447, 350899, 701819, 1403641, 2807303, 5614657, 11229331, 131 22458671, 44917381, 89834777, 179669557, 359339171, 718678369, 132 1437356741, 133 134 //chunk #9 135 43, 89, 179, 359, 719, 1439, 2879, 5779, 11579, 23159, 46327, 92657, 136 185323, 370661, 741337, 1482707, 2965421, 5930887, 11861791, 23723597, 137 47447201, 94894427, 189788857, 379577741, 759155483, 1518310967, 138 139 //chunk #10 140 379, 761, 1523, 3049, 6101, 12203, 24407, 48817, 97649, 195311, 390647, 141 781301, 1562611, 3125257, 6250537, 12501169, 25002389, 50004791, 142 100009607, 200019221, 400038451, 800076929, 1600153859 143 144 /* 145 // some more chunks for the low range [3..1000] 146 //chunk #11 147 13, 29, 59, 127, 257, 521, 1049, 2099, 4201, 8419, 16843, 33703, 67409, 148 134837, 269683, 539389, 1078787, 2157587, 4315183, 8630387, 17260781, 149 34521589, 69043189, 138086407, 276172823, 552345671, 1104691373, 150 151 //chunk #12 152 19, 41, 83, 167, 337, 677, 153 154 //1361, 2729, 5471, 10949, 21911, 43853, 87719, 175447, 350899, 701819, 155 //1403641, 2807303, 5614657, 11229331, 22458671, 44917381, 89834777, 156 //179669557, 359339171, 718678369, 1437356741, 157 158 //chunk #13 159 53, 107, 223, 449, 907, 1823, 3659, 7321, 14653, 29311, 58631, 117269, 160 234539, 469099, 938207, 1876417, 3752839, 7505681, 15011389, 30022781, 161 60045577, 120091177, 240182359, 480364727, 960729461, 1921458943 162 */ 163 }; 164 165 166 static { //initializer 167 // The above prime numbers are formatted for human readability. 168 // To find numbers fast, we sort them once and for all. 169 Arrays.sort(primeCapacities); 170 } 171 172 /** 173 * Makes this class non instantiable. 174 */ 175 private PrimeFinder() { 176 } 177 178 /** 179 * Returns a prime number which is <code>>= desiredCapacity</code> and 180 * very close to <code>desiredCapacity</code> (within 11% if 181 * <code>desiredCapacity >= 1000</code>). 182 * @param desiredCapacity the capacity desired by the user. 183 * @return the capacity which should be used for a hashtable. 184 */ 185 public static int nextPrime(int desiredCapacity) { 186 int i = Arrays.binarySearch(primeCapacities, desiredCapacity); 187 if (i < 0) { 188 // desired capacity not found, choose next prime greater 189 // than desired capacity 190 i = -i -1; // remember the semantics of binarySearch... 191 } 192 return primeCapacities[i]; 193 } 194 195 /** 196 * Tests correctness. Try 197 * from=1000, to=10000 198 * from=200, to=1000 199 * from=16, to=1000 200 * from=1000, to=Integer.MAX_VALUE 201 */ 202 protected static void main(String args[]) { 203 int from = Integer.parseInt(args[0]); 204 int to = Integer.parseInt(args[1]); 205 206 statistics(from, to, new PrintWriter(System.out)); 207 } 208 209 /** 210 * Tests correctness. 211 */ 212 protected static void statistics(int from, int to, PrintWriter pw) { 213 // check that primes contain no accidental errors 214 for (int i = 0; i < primeCapacities.length - 1; i++) { 215 if (primeCapacities[i] >= primeCapacities[i + 1]) { 216 throw new RuntimeException( 217 "primes are unsorted or contain duplicates; detected at " 218 + i + "@" + primeCapacities[i]); 219 } 220 } 221 222 double accDeviation = 0.0; 223 double maxDeviation = - 1.0; 224 225 for (int i = from; i <= to; i++) { 226 int primeCapacity = nextPrime(i); 227 //System.out.println(primeCapacity); 228 double deviation = (primeCapacity - i) / (double)i; 229 230 if (deviation > maxDeviation) { 231 maxDeviation = deviation; 232 pw.println("new maxdev @" + i + "@dev=" + maxDeviation); 233 } 234 235 accDeviation += deviation; 236 } 237 long width = 1 + (long)to - (long)from; 238 239 double meanDeviation = accDeviation / width; 240 pw.println("Statistics for [" + from + "," + to + "] are as follows"); 241 pw.println("meanDeviation = " + (float)meanDeviation * 100 + " %"); 242 pw.println("maxDeviation = " + (float)maxDeviation * 100 + " %"); 243 } 244} 245 246// End PrimeFinder.java