Java 百万级数据对比工具

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
public class Compare<K> {

    private final Map<K, Integer> data;
    private final Map<String, Integer> tagBits = new HashMap<>();

    private int nextBit = 1;

    public Compare() {
        data = new HashMap<>(1 << 20);
    }

    public Compare(int initialCapacity) {
        data = new HashMap<>(initialCapacity);
    }

    /**
     * 获取 tag 对应 bit
     */
    private int getBit(String tag) {
        return tagBits.computeIfAbsent(tag, k -> {
            int bit = nextBit;
            nextBit <<= 1;
            return bit;
        });
    }

    /**
     * 批量插入 (支持同 tag 多次调用)
     */
    public void add(String tag, Collection<K> collection) {
        if (collection == null || collection.isEmpty()) {
            return;
        }
        int bit = getBit(tag);
        collection.forEach(e -> put(e, bit));
    }

    /**
     * 插入单个 key
     */
    public void add(String tag, K key) {
        int bit = getBit(tag);
        put(key, bit);
    }

    /**
     * 添加数据
     */
    private void put(K key, int bit) {
        int old = data.getOrDefault(key, 0);
        if ((old & bit) != 0) {
            return;
        }
        data.put(key, old | bit);
    }

    /**
     * 获取某集合独有
     */
    public List<K> unique(String tag) {
        int bit = getBit(tag);
        return data.entrySet()
                .stream()
                .filter(e -> e.getValue() == bit)
                .map(Map.Entry::getKey)
                .toList();
    }

    /**
     * 交集
     */
    public List<K> intersection(String... tags) {
        int mask = buildMask(tags);
        return data.entrySet()
                .stream()
                .filter(e -> (e.getValue() & mask) == mask)
                .map(Map.Entry::getKey)
                .toList();
    }

    /**
     * 并集
     */
    public List<K> union(String... tags) {
        int mask = buildMask(tags);
        return data.entrySet()
                .stream()
                .filter(e -> (e.getValue() & mask) != 0)
                .map(Map.Entry::getKey)
                .toList();
    }

    /**
     * A-B
     */
    public List<K> difference(String a, String b) {
        int bitA = getBit(a);
        int bitB = getBit(b);
        return data.entrySet()
                .stream()
                .filter(e ->
                        (e.getValue() & bitA) != 0 &&
                                (e.getValue() & bitB) == 0)
                .map(Map.Entry::getKey)
                .toList();
    }

    private int buildMask(String... tags) {
        int mask = 0;
        for (String tag : tags) {
            mask |= getBit(tag);
        }
        return mask;
    }
}

如果本文对您有所帮助,欢迎打赏支持作者!

Licensed under CC BY-NC-SA 4.0
最后更新于 2026-03-12 15:59
使用 Hugo 构建
主题 StackJimmy 设计