-
Notifications
You must be signed in to change notification settings - Fork 106
Expand file tree
/
Copy pathArithmeticCoderBase.java
More file actions
172 lines (138 loc) · 6.38 KB
/
ArithmeticCoderBase.java
File metadata and controls
172 lines (138 loc) · 6.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/*
* Reference arithmetic coding
*
* Copyright (c) Project Nayuki
* MIT License. See readme file.
* https://www.nayuki.io/page/reference-arithmetic-coding
*/
import java.io.IOException;
/**
* Provides the state and behaviors that arithmetic coding encoders and decoders share.
* @see ArithmeticEncoder
* @see ArithmeticDecoder
*/
public abstract class ArithmeticCoderBase {
/*---- Configuration fields ----*/
/**
* Number of bits for the 'low' and 'high' state variables. Must be in the range [1, 62].
* <ul>
* <li>For state sizes less than the midpoint of around 32, larger values are generally better -
* they allow a larger maximum frequency total (maximumTotal), and they reduce the approximation
* error inherent in adapting fractions to integers; both effects reduce the data encoding loss
* and asymptotically approach the efficiency of arithmetic coding using exact fractions.</li>
* <li>But for state sizes greater than the midpoint, because intermediate computations are limited
* to the long integer type's 63-bit unsigned precision, larger state sizes will decrease the
* maximum frequency total, which might constrain the user-supplied probability model.</li>
* <li>Therefore numStateBits=32 is recommended as the most versatile setting
* because it maximizes maximumTotal (which ends up being slightly over 2^30).</li>
* <li>Note that numStateBits=62 is legal but useless because it implies maximumTotal=1,
* which means a frequency table can only support one symbol with non-zero frequency.</li>
* </ul>
*/
protected final int numStateBits;
/** Maximum range (high+1-low) during coding (trivial), which is 2^numStateBits = 1000...000. */
protected final long fullRange;
/** The top bit at width numStateBits, which is 0100...000. */
protected final long halfRange;
/** The second highest bit at width numStateBits, which is 0010...000. This is zero when numStateBits=1. */
protected final long quarterRange;
/** Minimum range (high+1-low) during coding (non-trivial), which is 0010...010. */
protected final long minimumRange;
/** Maximum allowed total from a frequency table at all times during coding. */
protected final long maximumTotal;
/** Bit mask of numStateBits ones, which is 0111...111. */
protected final long stateMask;
/*---- State fields ----*/
/**
* Low end of this arithmetic coder's current range. Conceptually has an infinite number of trailing 0s.
*/
protected long low;
/**
* High end of this arithmetic coder's current range. Conceptually has an infinite number of trailing 1s.
*/
protected long high;
/*---- Constructor ----*/
/**
* Constructs an arithmetic coder, which initializes the code range.
* @param numBits the number of bits for the arithmetic coding range
* @throws IllegalArgumentException if stateSize is outside the range [1, 62]
*/
public ArithmeticCoderBase(int numBits) {
if (!(1 <= numBits && numBits <= 62))
throw new IllegalArgumentException("State size out of range");
numStateBits = numBits;
fullRange = 1L << numStateBits;
halfRange = fullRange >>> 1; // Non-zero
quarterRange = halfRange >>> 1; // Can be zero
minimumRange = quarterRange + 2; // At least 2
maximumTotal = Math.min(Long.MAX_VALUE / fullRange, minimumRange);
stateMask = fullRange - 1;
low = 0;
high = stateMask;
}
/*---- Methods ----*/
/**
* Updates the code range (low and high) of this arithmetic coder as a result
* of processing the specified symbol with the specified frequency table.
* <p>Invariants that are true before and after encoding/decoding each symbol
* (letting fullRange = 2<sup>numStateBits</sup>):</p>
* <ul>
* <li>0 ≤ low ≤ code ≤ high < fullRange. ('code' exists only in the decoder.)
* Therefore these variables are unsigned integers of numStateBits bits.</li>
* <li>low < 1/2 × fullRange ≤ high.
* In other words, they are in different halves of the full range.</li>
* <li>(low < 1/4 × fullRange) || (high ≥ 3/4 × fullRange).
* In other words, they are not both in the middle two quarters.</li>
* <li>Let range = high − low + 1, then fullRange/4 < minimumRange ≤ range ≤
* fullRange. These invariants for 'range' essentially dictate the maximum total that the
* incoming frequency table can have, such that intermediate calculations don't overflow.</li>
* </ul>
* @param freqs the frequency table to use
* @param symbol the symbol that was processed
* @throws IllegalArgumentException if the symbol has zero frequency or the frequency table's total is too large
*/
protected void update(CheckedFrequencyTable freqs, int symbol) throws IOException {
// State check
if (low >= high || (low & stateMask) != low || (high & stateMask) != high)
throw new AssertionError("Low or high out of range");
long range = high - low + 1;
if (!(minimumRange <= range && range <= fullRange))
throw new AssertionError("Range out of range");
// Frequency table values check
long total = freqs.getTotal();
long symLow = freqs.getLow(symbol);
long symHigh = freqs.getHigh(symbol);
if (symLow == symHigh)
throw new IllegalArgumentException("Symbol has zero frequency");
if (total > maximumTotal)
throw new IllegalArgumentException("Cannot code symbol because total is too large");
// Update range
long newLow = low + symLow * range / total;
long newHigh = low + symHigh * range / total - 1;
low = newLow;
high = newHigh;
// While low and high have the same top bit value, shift them out
while (((low ^ high) & halfRange) == 0) {
shift();
low = ((low << 1) & stateMask);
high = ((high << 1) & stateMask) | 1;
}
// Now low's top bit must be 0 and high's top bit must be 1
// While low's top two bits are 01 and high's are 10, delete the second highest bit of both
while ((low & ~high & quarterRange) != 0) {
underflow();
low = (low << 1) ^ halfRange;
high = ((high ^ halfRange) << 1) | halfRange | 1;
}
}
/**
* Called to handle the situation when the top bit of {@code low} and {@code high} are equal.
* @throws IOException if an I/O exception occurred
*/
protected abstract void shift() throws IOException;
/**
* Called to handle the situation when low=01(...) and high=10(...).
* @throws IOException if an I/O exception occurred
*/
protected abstract void underflow() throws IOException;
}