本文共 14070 字,大约阅读时间需要 46 分钟。
wikipedia的描述:A hash function is any function that can be used to map data of arbitrary size to fixed-size values. The values returned by a hash function are called hash values, hash codes, digests, or simply hashes. The values are usually used to index a fixed-size table called a hash table.
哈希函数是指可以将任意大小的数据映射到固定大小的值的任意函数,其返回值成为哈希值,哈希码,摘要或哈希。这些值通常用于索引有固定大小的表,这种表称为哈希表。
数据索引查询
我们知道数组根据索引查询的方式很快,时间复杂度为O(1), 常数量级别。那能不能将无序数据转化成数组索引查询呢? 还真有,那就要介绍的哈希表。
能不能按照内容查询,不通过比较,通过计算得到地址? 根据给定值计算存储位置,记录的存储位置与该记录的关键字确定某种对应关系。
主要有三步骤:
计算哈希码,调用hashCode(), 得到一个int值;
计算在哈希表的存储位置:y=k(x)=x%11.(这里是除留取余法,实际上有种方法)
y:哈希表存储位置,k(x):哈希函数,x:哈希码。存入哈希表
例子:
结论1:添加数据快
结论2:唯一,无序查询跟添加过程差不多。计算哈希码,找到存储位置。
@Override public int hashCode() { return Integer.hashCode(value); } public static int hashCode(int value) { return value; }
Integer的哈希码就是自身。
@Override public int hashCode() { return Double.hashCode(value); } public static int hashCode(double value) { long bits = doubleToLongBits(value); return (int)(bits ^ (bits >>> 32)); }
public int hashCode() { int h = hash; if (h == 0 && value.length > 0) { char val[] = value; for (int i = 0; i < value.length; i++) { h = 31 * h + val[i]; } hash = h; } return h;}
@Override public int hashCode() { return Boolean.hashCode(value); } public static int hashCode(boolean value) { return value ? 1231 : 1237; }
@Override public int hashCode() { return Long.hashCode(value); } public static int hashCode(long value) { return (int)(value ^ (value >>> 32)); }
public class User { @TableId(type = IdType.ASSIGN_ID) private Long id; private String name; private Integer age; private String email; @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } User user = (User) o; return Objects.equals(id, user.id) && Objects.equals(name, user.name) && Objects.equals(age, user.age) && Objects.equals(email, user.email); } @Override public int hashCode() { return Objects.hash(id, name, age, email); }...gettter setter
// 底层代码,调用字段的hashCode()方法public static int hashCode(Object a[]) { if (a == null) return 0; int result = 1; for (Object element : a) result = 31 * result + (element == null ? 0 : element.hashCode()); return result; }
如果你有10万条数据,但是你的表长度只有100,显然冲突不可避免;如果你有100条数据,但是你的表长度10万,显然很浪费。这里需要一个度,引入一个因子。
加载因子:表记录数/哈希表长度 4/16=0.25.
java中的Map取0.75。/** * The load factor used when none specified in constructor. */ static final float DEFAULT_LOAD_FACTOR = 0.75f;
static class Entryimplements Map.Entry { final K key; V value; Entry next; // 下个节点的指针 int hash; // key的哈希码 ....... }
// 表初始大小16static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16// 哈希表最大长度static final int MAXIMUM_CAPACITY = 1 << 30;// 装填因子static final float DEFAULT_LOAD_FACTOR = 0.75f;// 哈希表 表长度必须是2的倍数transient Entry[] table = (Entry []) EMPTY_TABLE;// 数据存储的数量transient int size;// 扩容的阈值int threshold;
HashMapmap = new HashMap<>();public HashMap() { // 默认大小为16,装填因子为0.75 this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR); } public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); // 设置装填因子 和 扩容阈值 注意这里并没有初始化哈希表的大小 this.loadFactor = loadFactor; threshold = initialCapacity; init(); }
public V put(K key, V value) { // 如果哈希表为空表,则根据阈值初始化表 if (table == EMPTY_TABLE) { inflateTable(threshold); } // 如果key为null,则在索引为0的位置插入数据 if (key == null) return putForNullKey(value); // 计算哈希值 int hash = hash(key); // 根据哈希值计算索引 int i = indexFor(hash, table.length); // 判断索引位置的链表是否有相同的数据,如果有,则有新数据替代旧数据 for (Entrye = table[i]; e != null; e = e.next) { Object k; if (e.hash == hash && ((k = e.key) == key || key.equals(k))) { V oldValue = e.value; e.value = value; e.recordAccess(this); return oldValue; } } modCount++; // 索引位置为空或者链表都没有key相同的值,则直接在索引位置插入数据 addEntry(hash, key, value, i); return null; }
private void inflateTable(int toSize) { // 找到>=toSize且是2的倍数 int capacity = roundUpToPowerOf2(toSize); threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1); // 初始化表的大小 table = new Entry[capacity]; initHashSeedAsNeeded(capacity); }
private V putForNullKey(V value) { // 在索引位置为0的位置找key为null的数据,如果有则用新值替换旧值 for (Entrye = table[0]; e != null; e = e.next) { if (e.key == null) { V oldValue = e.value; e.value = value; e.recordAccess(this); return oldValue; } } modCount++; // 如果索引为空或链表均没有key为null的值,则直接插入 addEntry(0, null, value, 0); return null; }
void addEntry(int hash, K key, V value, int bucketIndex) { // 当数据量大于扩容阈值并且索引位置不为空,则扩容两倍 if ((size >= threshold) && (null != table[bucketIndex])) { // 扩容两倍 resize(2 * table.length); hash = (null != key) ? hash(key) : 0; bucketIndex = indexFor(hash, table.length); } // 创建新节点 createEntry(hash, key, value, bucketIndex); }
void resize(int newCapacity) { Entry[] oldTable = table; int oldCapacity = oldTable.length; if (oldCapacity == MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return; } // 新建哈希表 Entry[] newTable = new Entry[newCapacity]; // 将旧表的数据复制到新表,这就是为什么初始化map尽量指定大小 transfer(newTable, initHashSeedAsNeeded(newCapacity)); table = newTable; threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1); }
// 将旧表数据转移到新表,会将旧表索引位置的链表数据重新一个个计算新索引值void transfer(Entry[] newTable, boolean rehash) { int newCapacity = newTable.length; for (Entrye : table) { while(null != e) { Entry next = e.next; if (rehash) { e.hash = null == e.key ? 0 : hash(e.key); } int i = indexFor(e.hash, newCapacity); e.next = newTable[i]; newTable[i] = e; e = next; } } }
// 索引位置的数据作为next,新数据直接插入索引位置void createEntry(int hash, K key, V value, int bucketIndex) { Entrye = table[bucketIndex]; table[bucketIndex] = new Entry<>(hash, key, value, e); size++; }
// 计算哈希值final int hash(Object k) { int h = hashSeed; if (0 != h && k instanceof String) { return sun.misc.Hashing.stringHash32((String) k); } h ^= k.hashCode(); // This function ensures that hashCodes that differ only by // constant multiples at each bit position have a bounded // number of collisions (approximately 8 at default load factor). h ^= (h >>> 20) ^ (h >>> 12); return h ^ (h >>> 7) ^ (h >>> 4); }
// 计算索引位置,这边是java 与的计算,并不是除留取余法 static int indexFor(int h, int length) { // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2"; return h & (length-1); }
public V get(Object key) { // 如果key为null, if (key == null) return getForNullKey(); // 查找数据 Entryentry = getEntry(key); // 返回value值 return null == entry ? null : entry.getValue(); }
private V getForNullKey() { if (size == 0) { return null; } // 索引为0的位置,查找key为null的数据 for (Entrye = table[0]; e != null; e = e.next) { if (e.key == null) return e.value; } return null; }
final EntrygetEntry(Object key) { if (size == 0) { return null; } // 计算哈希值 int hash = (key == null) ? 0 : hash(key); // indexFor(hash, table.length) 计算索引位置 for (Entry e = table[indexFor(hash, table.length)]; e != null; e = e.next) { Object k; // 比较哈希值和key,如果都相等,则返回数据 if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } return null;}
static class Nodeimplements Map.Entry { final int hash; final K key; V value; Node next; .......}
// 表初始大小16static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16// 哈希表最大长度static final int MAXIMUM_CAPACITY = 1 << 30;// 装填因子static final float DEFAULT_LOAD_FACTOR = 0.75f;// 链表转化为红黑树的阈值static final int TREEIFY_THRESHOLD = 8;// 红黑树转为链表的阈值static final int UNTREEIFY_THRESHOLD = 6;// 最小树形化阈值,当哈希表的容量大于此值时,才允许链表转为红黑树,否则直接扩容static final int MIN_TREEIFY_CAPACITY = 64;// 哈希表 表长度必须是2的倍数,第一次使用才初始化transient Node[] table;// 数据存储的数量transient int size;// 扩容的阈值int threshold;
HashMapmap = new HashMap<>();public HashMap() { // 加载因子 this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted}public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); this.loadFactor = loadFactor; this.threshold = tableSizeFor(initialCapacity);}
public V put(K key, V value) { //hash(key) 计算哈希值 return putVal(hash(key), key, value, false, true); }
static final int hash(Object key) { // 计算哈希值 int h; return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); }
final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node[] tab; Node p; int n, i; // 哈希表为null或长度为0,初始化哈希表 if ((tab = table) == null || (n = tab.length) == 0) // resize() 当哈希表为null是,初始化表 n = (tab = resize()).length; // i = (n - 1) & hash] 计算索引位置,如果索引i位置无数据,则在该位置插入数据 if ((p = tab[i = (n - 1) & hash]) == null) // 插入新值 tab[i] = newNode(hash, key, value, null); else { Node e; K k; // 如果哈希码和key相等 if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; // 如果是树节点 else if (p instanceof TreeNode) e = ((TreeNode )p).putTreeVal(this, tab, hash, key, value); else { for (int binCount = 0; ; ++binCount) { if ((e = p.next) == null) { p.next = newNode(hash, key, value, null); if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st treeifyBin(tab, hash); break; } if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break; p = e; } } // 用新值替换旧值 if (e != null) { // existing mapping for key V oldValue = e.value; if (!onlyIfAbsent || oldValue == null) e.value = value; afterNodeAccess(e); return oldValue; } } ++modCount; // 如果哈希表容量大于扩容阈值,双倍扩容 if (++size > threshold) resize(); afterNodeInsertion(evict); return null; }
NodenewNode(int hash, K key, V value, Node next) { return new Node<>(hash, key, value, next);}
public V get(Object key) { Nodee; return (e = getNode(hash(key), key)) == null ? null : e.value;}
final NodegetNode(int hash, Object key) { Node [] tab; Node first, e; int n; K k; if ((tab = table) != null && (n = tab.length) > 0 && // n - 1) & hash 计算索引位置 (first = tab[(n - 1) & hash]) != null) { // 如果索引位置的数据哈希码和key都相同,直接返回 if (first.hash == hash && // always check first node ((k = first.key) == key || (key != null && key.equals(k)))) return first; if ((e = first.next) != null) { // 如果是树节点 if (first instanceof TreeNode) return ((TreeNode )first).getTreeNode(hash, key); // 如果不是树节点,则查链表 do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } while ((e = e.next) != null); } } return null; }
转载地址:http://bthwz.baihongyu.com/