ItemHashMap.java
package org.troy.capstone.data_structures.item_table;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.troy.capstone.constants.TableColumnName;
import org.troy.capstone.entities.Item;
import org.troy.capstone.interfaces.ItemRepo;
import tech.tablesaw.api.Row;
import tech.tablesaw.api.Table;
/**
* Custom {@code HashMap} implementation for storing {@code Item} instances with their IDs as keys, using a custom universal hash function ({@code IdHashKey}) to optimize bucket distribution for the specific set of item IDs in our dataset.
*/
public class ItemHashMap extends HashMap<IdHashKey, Item> implements ItemRepo {
//Main in EntryPoints.java to stop Javadocs from including it
/** Maximum load factor for the {@code ItemHashMap}. */
private static final float MAX_LOAD_FACTOR = 0.75f;
/** Table size for the {@code ItemHashMap}, chosen to be a power of 2 for efficient modulo operations, and large enough to maintain a load factor of 0.75 for our expected number of items (961). */
private static final int TABLE_SIZE = 2048;
/**
* Creates an {@code ItemHashMap} from a {@code Table}.
* The map is initialized with the optimal hash parameters for the current item IDs.
*
* @pre table is not null and contains the expected columns for creating {@code Item} instances (ID, Name, etc.), and contains no duplicate IDs.
*
* @param table A {@code tablesaw Table} containing the item data, with each row representing an item.
* @return itemMap An {@code ItemHashMap} containing all items from the table, with hash parameters optimized for the item IDs in the table.
*/
public static ItemHashMap fromTable(Table table) {
ItemHashMap itemMap = new ItemHashMap(table.rowCount());
System.out.println("P: " + IdHashKey.getP() + ", I: " + IdHashKey.getI() + ", J: " + IdHashKey.getJ());
itemMap.addAllItems(table);
itemMap.printBucketSizeCountsCustomVsBuiltIn();
return itemMap;
}
/**
* Make initial capacity so that we have just over a 0.75 load factor.
*
* @pre data_size is a positive integer.
*
* @param data_size The number of {@code Item} instances that will be added to the map, used to get a 0.75 load factor.
*/
private ItemHashMap(int data_size) {
super((int) (data_size / MAX_LOAD_FACTOR) + 1); //Calculate initial capacity based on expected data size and load factor
}
/**
* Adds an item to the hashmap given a tablesaw Row.
*
* @pre {@code itemRow} is not null and contains the expected columns for creating an {@code Item} instance (ID, Name, etc.).
*
* @param itemRow A Row from a {@code tablesaw Table} containing item info.
*/
private void addItem(Row itemRow) {
String itemId = itemRow.getString(TableColumnName.ID.getColumnName());
put(new IdHashKey(itemId), Item.fromRow(itemRow));
}
/**
* Adds all rows from a tablesaw Table to the hashmap as Items.
*
* @pre {@code table} is not null and contains the expected columns for creating {@code Item} instances (ID, Name, etc.), and contains no duplicate IDs.
*
* @param table A {@code tablesaw Table} with each row being an item to add to the map.
*/
private void addAllItems(Table table) {
table.stream().forEach(this::addItem);
System.out.println("Finished adding items. Total items added: " + size());
}
/**
* Retrieves an item from the {@code ItemHashMap} given its ID.
* Prints a message if the item is not found in the map.
*
* @pre {@code itemId} is not null and corresponds to a valid item ID in the map.
*
* @param itemId The ID of the item to retrieve.
* @return An Optional containing the {@code Item} if found, or empty if not found.
*/
@Override
public Optional<Item> getItem(String itemId) {
IdHashKey key = new IdHashKey(itemId);
Optional<Item> item = Optional.ofNullable(get(key));
if (item.isEmpty())
System.out.println("Item with ID " + itemId + " not found in {@code ItemHashMap}.");
return item;
}
/**
* Calculate bucket distribution with current I and J values (fresh hash calculation).
*
* @pre {@code itemIds} is not null and contains valid item IDs.
*
* @param itemIds A list of item IDs to calculate the bucket distribution for.
* @param useCustomHash Whether to use the custom {@code IdHashKey} hash function or the built-in {@code String} {@code hashCode}.
* @return {@code bucketSizeCounts} An array where the value at index N is the number of buckets that have N items in them, according to the specified hash function.
*/
public int[] getFreshBucketSizeCount( List<String> itemIds, boolean useCustomHash ){
List<Integer> buckets = itemIds.stream() //List of the buckets that get hashed to
.map( id -> useCustomHash ? new IdHashKey(id).hashCode() : id.hashCode() )
.collect(Collectors.toList());
int[] itemsInBucket = new int[TABLE_SIZE]; //Count of how many items get hashed to each bucket
for (int bucket : buckets)
itemsInBucket[Math.floorMod(bucket, TABLE_SIZE)]++;
//Find the maximum bucket size to avoid ArrayIndexOutOfBoundsException
int maxBucketSize = 0;
for (int count : itemsInBucket)
if (count > maxBucketSize)
maxBucketSize = count;
int[] bucketSizeCounts = new int[maxBucketSize + 1]; //Count of how many buckets have a certain size (0 items, 1 item, 2 items, etc.)
for (int count : itemsInBucket)
bucketSizeCounts[count]++;
return bucketSizeCounts;
}
/**
* Retrieves the keys of the {@code ItemHashMap} as a list of {@code IdHashKey} objects.
*
* @return A list of {@code IdHashKey} objects representing the keys in the {@code ItemHashMap}.
*/
private List<IdHashKey> getKeysAsList() {
return new ArrayList<>(keySet());
}
/**
* Retrieves the item IDs of the {@code ItemHashMap} as a list of strings.
*
* @return A list of strings representing the item IDs in the {@code ItemHashMap}.
*/
public List<String> getItemIdsAsList() {
return getKeysAsList().stream()
.map(IdHashKey::getValue)
.collect(Collectors.toList());
}
/**
* Retrieves the items of the {@code ItemHashMap} as a list of {@code Item} objects.
*
* @return A list of {@code Item} objects representing the values in the {@code ItemHashMap}.
*/
@Override
public List<Item> getItemsAsList() {
return new ArrayList<>(values());
}
/**
* Prints a table comparing the distribution of bucket sizes (number of buckets with 0 items, 1 item, 2 items, etc.) for the custom hash function vs Java's built in {@code String} {@code hashCode}, using the same item IDs.
* This allows us to see how well our custom universal hash function is performing in terms of distributing
* items across buckets compared to the built-in hash function.
*
* @pre {@code findBestHashParameters()} has already been called to optimize I and J for the current item IDs,
* the internal state of the {@code ItemHashMap} is not modified between the two distribution calculations (i.e. no items are added or removed),
* the same item IDs are used for both calculations,
* and the map must be filled.
*/
private void printBucketSizeCountsCustomVsBuiltIn(){
String col1 = "Entries in Bucket (N)", col2 = "Buckets with N entries (Custom Hash)", col3 = "Buckets with N entries (Built-in Hash)";
System.out.printf("%-" + col1.length() + "s %s %-" + col2.length() + "s %s %-" + col3.length() + "s%n", col1, "|", col2, "|", col3);
int[] customBucketSizeCounts = getFreshBucketSizeCount(getItemIdsAsList(), true); //Use fresh calculation with current I,J
int[] builtInBucketSizeCounts = getFreshBucketSizeCount(getItemIdsAsList(), false); //Use fresh calculation for built-in String hash
int maxSize = Math.max(customBucketSizeCounts.length, builtInBucketSizeCounts.length);
customBucketSizeCounts = Arrays.copyOf(customBucketSizeCounts, maxSize); //Pad with zeros to match size
builtInBucketSizeCounts = Arrays.copyOf(builtInBucketSizeCounts, maxSize); //Pad with zeros to match size
for (int i = 0; i < maxSize; i++){
int customCount = customBucketSizeCounts[i];
int builtInCount = builtInBucketSizeCounts[i];
System.out.printf("%-" + col1.length() + "d %s %-" + col2.length() + "d %s %-" + col3.length() + "d%n", i, "|", customCount, "|", builtInCount);
}
}
/**
* Creates a deep copy of the {@code ItemHashMap}.
*
* @return A new {@code ItemHashMap} containing the same entries as the original.
* @throws CloneNotSupportedException if any of the keys or values in the map do not support cloning.
*/
public ItemHashMap copy() throws CloneNotSupportedException {
ItemHashMap copy = new ItemHashMap(size());
for (Entry<IdHashKey, Item> entry : entrySet())
copy.put((IdHashKey) entry.getKey().clone(), (Item) entry.getValue().clone());
return copy;
}
/** Retrieves the number of items in the repository.
* @return The number of items in the repository.
*/
@Override
public int getSize() {
return size();
}
}