-
Notifications
You must be signed in to change notification settings - Fork 4
/
dtc.go
133 lines (112 loc) · 3.6 KB
/
dtc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package phash
// port from https://ironchef-team21.googlecode.com/git-history/75856e07bb89645d0e56820d6e79f8219a06bfb7/ironchef_team21/src/ImagePHash.java
import (
"image"
"math"
"sort"
"github.com/disintegration/imaging"
)
var (
dtcSizeBig = 32
dtcSize = 8
)
// DTC computes the perceptual hash for img using phash dtc image
// technique.
//
// 1. Reduce size to 32x32
// 2. Reduce color to greyscale
// 3. Compute the DCT.
// 4. Reduce the DCT to 8x8 in order to keep high frequencies.
// 5. Compute the median value of 8x8 dtc.
// 6. Further reduce the DCT into an uint64.
func DTC(img image.Image) (phash uint64) {
if img == nil {
return
}
size := dtcSizeBig
smallerSize := dtcSize
// 1. Reduce size.
// Like Average Hash, pHash starts with a small image. However,
// the image is larger than 8x8; 32x32 is a good size. This is
// really done to simplify the DCT computation and not because it
// is needed to reduce the high frequencies.
im := imaging.Resize(img, size, size, imaging.Lanczos)
// 2. Reduce color.
// The image is reduced to a grayscale just to further simplify
// the number of computations.
vals := make([]float64, size*size)
for i := 0; i < size; i++ {
for j := 0; j < size; j++ {
vals[size*i+j] = colorToGreyScaleFloat64(im.At(i, j))
}
}
// 3. Compute the DCT.
// The DCT separates the image into a collection of frequencies
// and scalars. While JPEG uses an 8x8 DCT, this algorithm uses a
// 32x32 DCT.
applyDCT2 := func(N int, f []float64) []float64 {
// initialize coefficients
c := make([]float64, N)
c[0] = 1 / math.Sqrt(2)
for i := 1; i < N; i++ {
c[i] = 1
}
// output goes here
F := make([]float64, N*N)
// construct a lookup table, because it's O(n^4)
entries := (2 * N) * (N - 1)
COS := make([]float64, entries)
for i := 0; i < entries; i++ {
COS[i] = math.Cos(float64(i) / float64(2*N) * math.Pi)
}
// the core loop inside a loop inside a loop...
for u := 0; u < N; u++ {
for v := 0; v < N; v++ {
var sum float64
for i := 0; i < N; i++ {
for j := 0; j < N; j++ {
sum += COS[(2*i+1)*u] *
COS[(2*j+1)*v] *
f[N*i+j]
}
}
sum *= ((c[u] * c[v]) / 4)
F[N*u+v] = sum
}
}
return F
}
dctVals := applyDCT2(size, vals)
// 4. Reduce the DCT.
// While the DCT is 32x32, just keep the top-left 8x8. Those
// represent the lowest frequencies in the picture.
vals = make([]float64, 0, smallerSize*smallerSize)
for x := 1; x <= smallerSize; x++ {
for y := 1; y <= smallerSize; y++ {
vals = append(vals, dctVals[size*x+y])
}
}
// 5. Compute the median value.
// Like the Average Hash, compute the mean DCT value (using only
// the 8x8 DCT low-frequency values and excluding the first term
// since the DC coefficient can be significantly different from
// the other values and will throw off the average).
sortedVals := make([]float64, smallerSize*smallerSize)
copy(sortedVals, vals)
sort.Float64s(sortedVals)
median := sortedVals[smallerSize*smallerSize/2]
// 6. Further reduce the DCT.
// Set the 64 hash bits to 0 or 1 depending on whether each of the
// 64 DCT values is above or below the average value. The result
// doesn't tell us the actual low frequencies; it just tells us
// the very-rough relative scale of the frequencies to the mean.
// The result will not vary as long as the overall structure of
// the image remains the same; this will survive gamma and color
// histogram adjustments without a problem.
for n, e := range vals {
if e > median { // when frequency is higher than median
phash ^= (1 << uint64(n)) // set nth bit to one
}
}
return phash
}