-
Notifications
You must be signed in to change notification settings - Fork 16
/
georaptor.py
128 lines (97 loc) · 4.32 KB
/
georaptor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/python3
import time
import argparse
from clint.textui import puts, indent, colored
def read_file(filename):
# Read file by lines into a set
with open(filename) as f:
geohashes = set(f.read().splitlines())
return geohashes
# Combination generator for a given geohash at the next level
def get_combinations(string):
base32 = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm',
'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
return [string + '{0}'.format(i) for i in base32]
# Recursive optimization of the geohash set
def compress(geohashes, minlevel, maxlevel):
deletegh = set()
final_geohashes = set()
flag = True
final_geohashes_size = 0
# Input size less than 32
if len(geohashes) == 0:
puts(colored.red('No geohashes found!\n'))
return False
while flag:
final_geohashes.clear()
deletegh.clear()
for geohash in geohashes:
geohash_length = len(geohash)
# Compress only if geohash length is greater than the min level
if geohash_length >= minlevel:
# Get geohash to generate combinations for
part = geohash[:-1]
# Proceed only if not already processed
if part not in deletegh and geohash not in deletegh:
# Generate combinations
combinations = set(get_combinations(part))
# If all generated combinations exist in the input set
if combinations.issubset(geohashes):
# Add part to temporary output
final_geohashes.add(part)
# Add part to deleted geohash set
deletegh.add(part)
# Else add the geohash to the temp out and deleted set
else:
deletegh.add(geohash)
# Forced compression if geohash length is greater than max level after combination check failure
if geohash_length >= maxlevel:
final_geohashes.add(geohash[:maxlevel])
else:
final_geohashes.add(geohash)
# Break if compressed output size same as the last iteration
if final_geohashes_size == len(final_geohashes):
flag = False
final_geohashes_size = len(final_geohashes)
geohashes.clear()
# Temp output moved to the primary geohash set
geohashes = geohashes.union(final_geohashes)
return geohashes
def main():
start_time = time.time()
output_file = 'output.csv'
minlevel = 1
maxlevel = 12
# Fetch input arguments
parser = argparse.ArgumentParser()
parser.add_argument('input', help='input filename containing list of geohashes')
parser.add_argument('--output', default='output.csv',
help='output filename containing a optimized list of geohashes (default: output.csv)')
parser.add_argument('--minlevel', type=int, default=1,
help='minimum level of geohash (default: 1)')
parser.add_argument('--maxlevel', type=int, default=12,
help='maximum level of geohash (default: 12)')
args = parser.parse_args()
filename = args.input
output_file = args.output
minlevel = args.minlevel
maxlevel = args.maxlevel
puts(colored.green('\nReading the file'))
geohashes = read_file(filename)
puts(colored.green(str("{:,}".format(len(geohashes))) + ' geohashes read\n'))
time.sleep(1)
puts(colored.red('Starting compression\n'))
georaptor_out = compress(geohashes, minlevel, maxlevel)
puts(colored.red('Compressed to: ' + str("{:,}".format(len(georaptor_out)))))
if georaptor_out != False:
# Output to file
with open(output_file, 'a') as fp:
for geohash in georaptor_out:
fp.write(geohash + '\n')
puts(colored.red('\nCompression complete!'))
time.sleep(1)
puts(colored.white('\nOutput available at ' + output_file + '\n'))
et = time.time() - start_time
puts(colored.green('Total execution time: ' + str(et) + ' seconds\n'))
if __name__ == "__main__":
main()