This Document will describe how to remove unused PdxTypes from a VMware GemFire cluster. In situations when unstructured data keeps creating a new PdxType for every unique structure, you will see a slow import of data performance with log entries as discussed in the knowledge base article:
VMware GemFire does not remove unused PDX types by default. However, we can write a function to remove any unused PDX types. The function is below:
import org.apache.geode.cache.Cache; import org.apache.geode.cache.CacheFactory; import org.apache.geode.cache.Declarable; import org.apache.geode.cache.Region; import org.apache.geode.cache.execute.Function; import org.apache.geode.cache.execute.FunctionContext; import org.apache.geode.cache.execute.RegionFunctionContext; import org.apache.geode.pdx.internal.PdxInstanceImpl; import org.apache.geode.pdx.internal.PdxField; import org.apache.geode.pdx.internal.PdxType; import org.apache.geode.pdx.internal.PeerTypeRegistration; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; public class RemoveUnusedPdxTypesFunction implements Function, Declarable { private final Cache cache; public RemoveUnusedPdxTypesFunction() { this.cache = CacheFactory.getAnyInstance(); } public void execute(FunctionContext context) { Object[] arguments = (Object[]) context.getArguments(); String regionName = (String) arguments[0]; boolean simulate = (Boolean) arguments[1]; // Get all PdxTypes Map<Integer,PdxType> allPdxTypesCopy = getAllPdxTypes(); dumpPdxTypes(allPdxTypesCopy, "existing"); // Iterate region values as PdxInstances and remove in use PdxTypes from the map Region region = this.cache.getRegion(regionName); for (Object value : region.values()) { PdxInstanceImpl pdxInstance = (PdxInstanceImpl) value; removeInUsePdxTypes(allPdxTypesCopy, pdxInstance); } // Remove in use PdxTypes dumpPdxTypes(allPdxTypesCopy, "unused"); if (!simulate) { this.cache.getRegion(PeerTypeRegistration.REGION_NAME).removeAll(allPdxTypesCopy.keySet()); dumpPdxTypes(getAllPdxTypes(), "used"); } context.getResultSender().lastResult(true); } private void removeInUsePdxTypes(Map<Integer,PdxType> allPdxTypesCopy, Object obj) { if (obj instanceof PdxInstanceImpl) { PdxInstanceImpl pdxInstance = (PdxInstanceImpl) obj; PdxType pdxType = pdxInstance.getPdxType(); allPdxTypesCopy.remove(pdxType.getTypeId()); for (PdxField field : pdxType.getFields()) { Object fieldValue = pdxInstance.readField(field.getFieldName()); removeInUsePdxTypes(allPdxTypesCopy, fieldValue); } } else if (obj instanceof Collection) { List list = (List) obj; for (Iterator i = list.iterator(); i.hasNext();) { removeInUsePdxTypes(allPdxTypesCopy, i.next()); } } } private Map getAllPdxTypes() { return this.cache.getRegion(PeerTypeRegistration.REGION_NAME).entrySet() .stream() .filter(entry -> entry.getValue() instanceof PdxType) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } private void dumpPdxTypes(Map<Integer,PdxType> pdxTypes, String message) { StringBuilder builder = new StringBuilder(); builder.append("There are ").append(pdxTypes.size()).append(" ").append(message).append(" PdxTypes:"); for (PdxType pdxType : pdxTypes.values()) { Set<String> fieldNames = pdxType.getFields() .stream() .map(field -> field.getFieldName()) .collect(Collectors.toSet()); builder.append("\n\t").append("id=").append(pdxType.getTypeId()).append("; className=").append(pdxType.getClassName()).append("; fields=").append(fieldNames); } this.cache.getLogger().info(builder.toString()); } public String getId() { return getClass().getSimpleName(); } }
The RemoveUnusedPdxTypesFunction does the following:
The PdxTypes are stored in a replicated Region called PdxTypes. EnumInfo instances (which represent Enums in PDX) are also stored in that Region. This method filters those out and returns just the PdxTypes.
private Map getAllPdxTypes() { return this.cache.getRegion("PdxTypes").entrySet() .stream() .filter(entry -> entry.getValue() instanceof PdxType) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); }
This method iteratively removes all in-use PdxTypes from the input object by getting the PdxType from the object as a PdxInstanceImpl and removing it from the collection. It then iterates each field of the PdxInstanceImpl and recursively calls the method on that field’s value. Collections and Maps are iterated separately, but every object ends up in the first conditional.
private void removeInUsePdxTypes(Map<Integer,PdxType> allPdxTypesCopy, Object parent, String objFieldName, Object obj) { if (obj instanceof PdxInstanceImpl) { PdxInstanceImpl pdxInstance = (PdxInstanceImpl) obj; PdxType pdxType = pdxInstance.getPdxType(); allPdxTypesCopy.remove(pdxType.getTypeId()); for (PdxField field : pdxType.getFields()) { String fieldName = field.getFieldName(); Object fieldValue = pdxInstance.readField(fieldName); removeInUsePdxTypes(allPdxTypesCopy, obj, fieldName, fieldValue); } } else if (obj instanceof Collection) { ((List) obj).forEach(value -> removeInUsePdxTypes(allPdxTypesCopy, obj, objFieldName, value)); } else if (obj instanceof Map) { ((Map) obj).forEach((key, value) -> { removeInUsePdxTypes(allPdxTypesCopy, obj, objFieldName, key); removeInUsePdxTypes(allPdxTypesCopy, obj, objFieldName, value); }); } }
Delete Remaining PdxTypes
this.cache.getRegion("PdxTypes").removeAll(allPdxTypesCopy.keySet());