|
16 | 16 | package com.dqops.data.readouts.snapshot;
|
17 | 17 |
|
18 | 18 | import com.dqops.data.readouts.factory.SensorReadoutsColumnNames;
|
| 19 | +import com.dqops.data.storage.LoadedMonthlyPartition; |
| 20 | +import com.dqops.data.storage.ParquetPartitionId; |
19 | 21 | import com.dqops.utils.tables.TableColumnUtility;
|
| 22 | +import lombok.Data; |
20 | 23 | import tech.tablesaw.api.LongColumn;
|
21 | 24 | import tech.tablesaw.api.Table;
|
22 | 25 | import tech.tablesaw.index.LongIndex;
|
23 | 26 | import tech.tablesaw.selection.Selection;
|
24 | 27 |
|
| 28 | +import java.lang.ref.WeakReference; |
25 | 29 | import java.time.LocalDate;
|
26 |
| -import java.util.HashMap; |
27 | 30 | import java.util.LinkedHashMap;
|
28 | 31 | import java.util.Map;
|
| 32 | +import java.util.TreeMap; |
29 | 33 |
|
30 | 34 | /**
|
31 | 35 | * Dictionary of identified time series in the historic sensor readout results.
|
32 | 36 | */
|
33 | 37 | public class SensorReadoutsTimeSeriesMap {
|
34 |
| - private final Map<SensorReadoutTimeSeriesKey, SensorReadoutsTimeSeriesData> entries = new LinkedHashMap<>(); |
| 38 | + private final Map<SensorReadoutTimeSeriesKey, WeakReference<SensorReadoutsTimeSeriesData>> entries = new LinkedHashMap<>(); |
| 39 | + private final Map<ParquetPartitionId, LoadedMonthlyPartition> partitionMap; |
| 40 | + private final Map<ParquetPartitionId, PartitionIndexes> partitionIndexes = new TreeMap<>(); |
35 | 41 | private LocalDate firstLoadedMonth;
|
36 | 42 | private LocalDate lastLoadedMonth;
|
37 |
| - private Table allLoadedData; |
38 |
| - private LongColumn checkHashColumn; |
39 |
| - private LongColumn dataStreamHashColumn; |
40 |
| - private LongIndex checkHashIndex; |
41 |
| - private LongIndex dataStreamHashIndex; |
42 | 43 |
|
43 | 44 | /**
|
44 | 45 | * Create a time series map.
|
45 | 46 | * @param firstLoadedMonth The date of the first loaded month.
|
46 | 47 | * @param lastLoadedMonth The date of the last loaded month.
|
| 48 | + * @param partitionMap Dictionary of loaded partitions. |
47 | 49 | */
|
48 |
| - public SensorReadoutsTimeSeriesMap(LocalDate firstLoadedMonth, LocalDate lastLoadedMonth, Table allLoadedData) { |
| 50 | + public SensorReadoutsTimeSeriesMap(LocalDate firstLoadedMonth, LocalDate lastLoadedMonth, |
| 51 | + Map<ParquetPartitionId, LoadedMonthlyPartition> partitionMap) { |
49 | 52 | this.firstLoadedMonth = firstLoadedMonth;
|
50 | 53 | this.lastLoadedMonth = lastLoadedMonth;
|
51 |
| - this.allLoadedData = allLoadedData; |
52 |
| - |
53 |
| - if (allLoadedData != null) { |
54 |
| - this.checkHashColumn = (LongColumn) allLoadedData.column(SensorReadoutsColumnNames.CHECK_HASH_COLUMN_NAME); |
55 |
| - this.dataStreamHashColumn = (LongColumn) TableColumnUtility.findColumn(allLoadedData, |
56 |
| - SensorReadoutsColumnNames.DATA_GROUP_HASH_COLUMN_NAME); |
57 |
| - this.checkHashIndex = new LongIndex(this.checkHashColumn); |
58 |
| - this.dataStreamHashIndex = new LongIndex(this.dataStreamHashColumn); |
| 54 | + this.partitionMap = partitionMap; |
| 55 | + if (partitionMap != null) { |
| 56 | + for (Map.Entry<ParquetPartitionId, LoadedMonthlyPartition> partitionKeyValue : partitionMap.entrySet()) { |
| 57 | + Table partitionData = partitionKeyValue.getValue().getData(); |
| 58 | + if (partitionData == null) { |
| 59 | + return; |
| 60 | + } |
| 61 | + |
| 62 | + LongColumn checkHashColumn = (LongColumn) partitionData.column(SensorReadoutsColumnNames.CHECK_HASH_COLUMN_NAME); |
| 63 | + LongColumn dataStreamHashColumn = (LongColumn) TableColumnUtility.findColumn(partitionData, |
| 64 | + SensorReadoutsColumnNames.DATA_GROUP_HASH_COLUMN_NAME); |
| 65 | + LongIndex checkHashIndex = new LongIndex(checkHashColumn); |
| 66 | + LongIndex dataStreamHashIndex = new LongIndex(dataStreamHashColumn); |
| 67 | + |
| 68 | + PartitionIndexes partitionIndexesEntry = new PartitionIndexes(checkHashIndex, dataStreamHashIndex, partitionKeyValue.getValue()); |
| 69 | + this.partitionIndexes.put(partitionKeyValue.getKey(), partitionIndexesEntry); |
| 70 | + } |
59 | 71 | }
|
60 | 72 | }
|
61 | 73 |
|
@@ -83,30 +95,66 @@ public LocalDate getLastLoadedMonth() {
|
83 | 95 | */
|
84 | 96 | public SensorReadoutsTimeSeriesData findTimeSeriesData(long checkHashId, long dimensionId) {
|
85 | 97 | SensorReadoutTimeSeriesKey key = new SensorReadoutTimeSeriesKey(checkHashId, dimensionId);
|
86 |
| - SensorReadoutsTimeSeriesData sensorReadoutsTimeSeriesData = this.entries.get(key); |
| 98 | + WeakReference<SensorReadoutsTimeSeriesData> sensorReadoutsTimeSeriesDataRef = this.entries.get(key); |
| 99 | + SensorReadoutsTimeSeriesData sensorReadoutsTimeSeriesData = sensorReadoutsTimeSeriesDataRef != null ? |
| 100 | + sensorReadoutsTimeSeriesDataRef.get() : null; |
| 101 | + |
87 | 102 | if (sensorReadoutsTimeSeriesData != null) {
|
88 | 103 | return sensorReadoutsTimeSeriesData;
|
89 | 104 | }
|
90 | 105 |
|
91 |
| - if (this.checkHashIndex == null) { |
92 |
| - return null; |
93 |
| - } |
| 106 | + Table allTimeSeriesData = null; |
| 107 | + |
| 108 | + for (Map.Entry<ParquetPartitionId, PartitionIndexes> partitionIndexesKeyValue : this.partitionIndexes.entrySet()) { |
| 109 | + PartitionIndexes partitionIndexesEntry = partitionIndexesKeyValue.getValue(); |
| 110 | + Selection checkHashRows = partitionIndexesEntry.checkHashIndex.get(checkHashId); |
| 111 | + Selection groupHashRows = partitionIndexesEntry.dataStreamHashIndex.get(dimensionId); |
| 112 | + |
| 113 | + Table partitionDataTable = partitionIndexesEntry.partitionData.getData(); |
| 114 | + if (partitionDataTable == null) { |
| 115 | + continue; |
| 116 | + } |
94 | 117 |
|
95 |
| - Selection checkHashRows = this.checkHashIndex.get(checkHashId); |
96 |
| - Selection groupHashRows = this.dataStreamHashIndex.get(dimensionId); |
| 118 | + Table filteredPartitionRows = partitionDataTable.where(checkHashRows.and(groupHashRows)); |
| 119 | + Table sortedTimeSeriesTable = filteredPartitionRows.sortOn(SensorReadoutsColumnNames.TIME_PERIOD_COLUMN_NAME); |
97 | 120 |
|
98 |
| - Table filteredRows = this.allLoadedData.where(checkHashRows.and(groupHashRows)); |
99 |
| - Table sortedTimeSeriesTable = filteredRows.sortOn(SensorReadoutsColumnNames.TIME_PERIOD_COLUMN_NAME); |
| 121 | + if (allTimeSeriesData == null) { |
| 122 | + allTimeSeriesData = sortedTimeSeriesTable; |
| 123 | + } else { |
| 124 | + allTimeSeriesData.append(sortedTimeSeriesTable); |
| 125 | + } |
| 126 | + } |
| 127 | + |
| 128 | + SensorReadoutsTimeSeriesData timeSeriesDataSlice = new SensorReadoutsTimeSeriesData(key, allTimeSeriesData); |
100 | 129 |
|
101 |
| - SensorReadoutsTimeSeriesData newSubset = new SensorReadoutsTimeSeriesData(key, sortedTimeSeriesTable); |
102 |
| - return newSubset; |
| 130 | + // TODO: we could store it in the cache.. but not for the moment, maybe for a different use case |
| 131 | + return timeSeriesDataSlice; |
103 | 132 | }
|
104 | 133 |
|
105 | 134 | /**
|
106 |
| - * Adds a time series object to the dictionary. |
107 |
| - * @param timeSeries Time series object. |
| 135 | + * Partition indexes container. |
108 | 136 | */
|
109 |
| - public void add(SensorReadoutsTimeSeriesData timeSeries) { |
110 |
| - this.entries.put(timeSeries.getKey(), timeSeries); |
| 137 | + @Data |
| 138 | + public static class PartitionIndexes { |
| 139 | + /** |
| 140 | + * Check hash index. |
| 141 | + */ |
| 142 | + private final LongIndex checkHashIndex; |
| 143 | + |
| 144 | + /** |
| 145 | + * Data stream (data group) hash index. |
| 146 | + */ |
| 147 | + private final LongIndex dataStreamHashIndex; |
| 148 | + |
| 149 | + /** |
| 150 | + * The partition data. |
| 151 | + */ |
| 152 | + private final LoadedMonthlyPartition partitionData; |
| 153 | + |
| 154 | + public PartitionIndexes(LongIndex checkHashIndex, LongIndex dataStreamHashIndex, LoadedMonthlyPartition monthlyPartition) { |
| 155 | + this.checkHashIndex = checkHashIndex; |
| 156 | + this.dataStreamHashIndex = dataStreamHashIndex; |
| 157 | + this.partitionData = monthlyPartition; |
| 158 | + } |
111 | 159 | }
|
112 | 160 | }
|
0 commit comments