-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#171 Removing guava dependency and adding bloom filter.
- Loading branch information
1 parent
e42391b
commit 2150d10
Showing
12 changed files
with
94 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 73 additions & 0 deletions
73
phileas-model/src/main/java/ai/philterd/phileas/model/utils/BloomFilter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package ai.philterd.phileas.model.utils; | ||
|
||
import org.apache.commons.codec.digest.MurmurHash3; | ||
|
||
import java.nio.charset.StandardCharsets; | ||
import java.security.MessageDigest; | ||
import java.security.NoSuchAlgorithmException; | ||
import java.util.BitSet; | ||
import java.util.function.Function; | ||
|
||
import static org.apache.commons.codec.digest.MurmurHash3.DEFAULT_SEED; | ||
|
||
public class BloomFilter<T> { | ||
|
||
private final BitSet bitSet; | ||
private final Function<T, Integer>[] hashFunctions; | ||
|
||
public BloomFilter(int size) { | ||
this.bitSet = new BitSet(size); | ||
this.hashFunctions = createHashFunctions(); | ||
} | ||
|
||
public void put(T element) { | ||
for (final Function<T, Integer> hashFunction : hashFunctions) { | ||
int hash = hashFunction.apply(element); | ||
bitSet.set(Math.abs(hash) % bitSet.size(), true); | ||
} | ||
} | ||
|
||
public boolean mightContain(T element) { | ||
for (final Function<T, Integer> hashFunction : hashFunctions) { | ||
int hash = hashFunction.apply(element); | ||
if (!bitSet.get(Math.abs(hash) % bitSet.size())) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
private Function<T, Integer>[] createHashFunctions() { | ||
|
||
Function<T, Integer>[] functions = new Function[2]; | ||
|
||
functions[0] = (T element) -> { | ||
final byte[] data = element.toString().getBytes(StandardCharsets.UTF_8); | ||
return MurmurHash3.hash32x86(data, 0, data.length, DEFAULT_SEED); | ||
}; | ||
|
||
functions[1] = (T element) -> { | ||
|
||
try { | ||
|
||
final MessageDigest digest = MessageDigest.getInstance("MD5"); | ||
byte[] hash = digest.digest(element.toString().getBytes(StandardCharsets.UTF_8)); | ||
|
||
int hashCode = 0; | ||
for (int i = 0; i < 4; i++) { | ||
hashCode = (hashCode << 8) | (hash[i] & 0xFF); | ||
} | ||
|
||
return hashCode; | ||
|
||
} catch (NoSuchAlgorithmException e) { | ||
throw new RuntimeException("MD5 algorithm not found", e); | ||
} | ||
|
||
}; | ||
|
||
return functions; | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters