-
Notifications
You must be signed in to change notification settings - Fork 9
/
music_processor.py
executable file
·377 lines (299 loc) · 11.4 KB
/
music_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
# Copyright 2017 Nanoleaf Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pyaudio
import librosa
import numpy as np
import argparse
import socket
import sys
import threading
from time import sleep, time
from distutils.version import StrictVersion
from builtins import input
pyaudio_lock = threading.Lock()
keypress_lock = threading.Lock()
stop_pyaudio_thread = False
data_buffer = []
data_buffer_updated = False
sample_rate = 0
stop_loop = False
class KeyPressThread (threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
global stop_loop
while True:
key = input("enter q to quit\n>>> ")
if key == "q":
keypress_lock.acquire()
stop_loop = True
keypress_lock.release()
break
sleep(0.2)
class PyAudioThread (threading.Thread):
def __init__(self, input_samples, input_format):
threading.Thread.__init__(self)
self.input_format = input_format
self.input_samples = input_samples
def run(self):
# create pyaudio object
pa = pyaudio.PyAudio()
# get default host/input device info dicts
try:
host_api_info = pa.get_default_host_api_info()
default_input_device_info = pa.get_default_input_device_info()
except IOError:
print("Input audio device not found, terminating ...")
pa.terminate()
sys.exit()
default_input_sample_rate = default_input_device_info['defaultSampleRate'] # float
default_low_input_latency = default_input_device_info['defaultLowInputLatency'] # float
default_high_input_latency = default_input_device_info['defaultHighInputLatency'] # float
default_max_input_channels = default_input_device_info['maxInputChannels'] # integer
print("default inputs: sample rate {}, latency low {:.4f}, latency high {:.4f}, channels {}".\
format(default_input_sample_rate, default_low_input_latency, default_high_input_latency, default_max_input_channels))
global sample_rate
pyaudio_lock.acquire()
sample_rate = default_input_sample_rate
pyaudio_lock.release()
stream = pa.open(rate=int(sample_rate),
channels=1,
format=self.input_format,
input=True,
frames_per_buffer=self.input_samples,
stream_callback=PyAudioThread.input_callback)
stream.start_stream()
global stop_pyaudio_thread
while not stop_pyaudio_thread:
sleep(0.1)
stream.stop_stream()
stream.close()
# terminate pyaudio object
pa.terminate()
@staticmethod
def input_callback(in_data, frame_count, time_info, status):
global data_buffer, data_buffer_updated
pyaudio_lock.acquire()
data_buffer = in_data # fill data
data_buffer_updated = True # set updated flag
pyaudio_lock.release()
return None, pyaudio.paContinue
def update_magnitude_scaling(mag, scalar, min_scalar):
'''
:param mag: numpy array of magnitudes of frequency bins
:param scalar: previous scalar
:param min_scalar: minimum scalar
:return: updated scalar
'''
max_mag = np.max(mag)
mag_diff = max_mag - scalar
updated_scalar = scalar + 0.02 * mag_diff
if scalar < min_scalar:
updated_scalar = min_scalar
# print("{} ({})".format(updated_scalar, max_mag))
return updated_scalar
def visualizer(data_in):
scalar = 5
hi_limit = 100
value = np.sum(np.abs(data_in) ** 2) * scalar
if value > hi_limit:
value = hi_limit
value = int(value)
# back to front of row
sys.stdout.write('\r')
# write "|" followed by spaces
for i in range(0, value, 1):
sys.stdout.write('|')
for i in range(value, hi_limit, 1):
sys.stdout.write(' ')
sys.stdout.write('\n')
# flush output
sys.stdout.flush()
def check_min_versions():
ret = True
# pyaudio
vers_required = "0.2.7"
vers_current = pyaudio.__version__
if StrictVersion(vers_current) < StrictVersion(vers_required):
print("Error: minimum pyaudio vers: {}, current vers {}".format(vers_required, vers_current))
ret = False
# librosa
vers_required = "0.4.3"
vers_current = librosa.__version__
if StrictVersion(vers_current) < StrictVersion(vers_required):
print("Error: minimum librosa vers: {}, current vers {}".format(vers_required, vers_current))
ret = False
# numpy
vers_required = "1.9.0"
vers_current = np.__version__
if StrictVersion(vers_current) < StrictVersion(vers_required):
print("Error: minimum numpy vers: {}, current vers {}".format(vers_required, vers_current))
ret = False
return ret
def get_output_fft_bins(fft_mag, n_out):
n_in = len(fft_mag)
step_size = int(n_in/n_out)
fft_out = np.zeros(n_out)
n_filled = 0
i = 0
while n_filled < n_out:
acc = np.sum(fft_mag[i:min(i+step_size, n_in)])
i += step_size
# saturate to 8-bit unsigned
if acc > 255:
acc = 255
fft_out[n_filled] = acc
n_filled += 1
return fft_out[0:n_out]
def process_music_data(data_in, is_fft, is_mel, n_out_bins, n_fft, n_mel, is_energy, is_visual):
# length is len(data_in)/4
data_np = np.fromstring(data_in, 'Float32')
# visualizer
if is_visual:
visualizer(data_np)
# energy
if is_energy:
energy = np.abs(data_np) ** 2
energy = energy.sum()
energy *= 2**5
energy_output = energy.astype(np.uint16)
else:
energy_output = np.zeros(2).astype(np.uint16)
# fft or mel
if is_fft or is_mel:
global sample_rate
# down-sample by 4, with filtering, energy not scaled
data_np = librosa.resample(data_np,
sample_rate,
sample_rate/4,
res_type='kaiser_fast')
# short time fft over n_fft samples
fft_data = librosa.stft(data_np, n_fft,
hop_length=n_fft,
center=False)
# calculate FFT or Mel
if is_fft:
fft_data_mag = np.abs(fft_data[0:n_fft // 2]) ** 2
fft_data_mag *= 2**3
fft_output = get_output_fft_bins(fft_data_mag, n_out_bins)
else:
fft_data_mag = np.abs(fft_data)**2
fft_data_mag *= 2**2
mel_data = librosa.feature.melspectrogram(S=fft_data_mag, sr=sample_rate / 4, n_mels=n_mel)
fft_output = get_output_fft_bins(mel_data, n_out_bins)
# output uint8_t
fft_output = fft_output.astype(np.uint8)
else:
fft_output = np.zeros(n_out_bins).astype(np.uint8)
return fft_output, energy_output
if __name__ == '__main__':
# parameters
input_samples = 2**11
input_format = pyaudio.paFloat32
min_delay = 50
n_fft = 512
n_mel = 26 # fixed, same as in Aurora
udp_host = "127.0.0.1"
udp_port = 27182
sound_feature_udp_port = 27184
# check minimum versions of imported modules
if not check_min_versions():
print("Minimum version not satisfied, please upgrade your modules as indicated!")
exit(1)
# parse command arguments
parser = argparse.ArgumentParser(description="Music processing and streaming script for the Nanoleaf Rhythm SDK")
parser.add_argument("--viz", help="turn on simple visualizer, please limit use to setup and debug", action="store_true")
args = parser.parse_args()
visualize = args.viz
# open udp socket to receive
udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
udp_socket.bind((udp_host, sound_feature_udp_port))
# prompt user to start plugin
print("Music processor initialized... please run your plugin to continue or ctrl+c to exit")
# receive sound feature
packet, addr = udp_socket.recvfrom(20) # blocking, ctrl+c to exit
from_host = addr[0]
udp_socket.close()
print("Plugin detected... continuing")
# packet contains: [b i b] where b is boolean, i is integer
if from_host == udp_host:
tokens = packet.split()
is_fft = int(tokens[0])
n_bins_out = int(tokens[1])
is_energy = int(tokens[2])
is_mel = int(tokens[3]) # either is_fft or is_mel, cannot be both
if is_mel:
n_bins_out = n_mel
is_fft = False
# print("Sound features requested: fft {} mel {} bins out {} energy {}".format(is_fft, is_mel, n_bins_out, is_energy))
# start pyaudio thread
pa_thread = PyAudioThread(input_samples, input_format)
pa_thread.start()
sleep(1)
# open new udp socket to send
udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
# main loop
data = []
data_updated = False
stop = False
print("Music processor active!")
if visualize:
print("Visualize on: try a loud clap and a simple sound bar should appear")
else:
print("If nothing seems to be happening, try running with --viz")
# start key press thread
kp_thread = KeyPressThread()
kp_thread.start()
# start timer
startTime = time()
# main processing loop
while True:
pyaudio_lock.acquire()
data_updated = data_buffer_updated
data = data_buffer
data_buffer_updated = False
pyaudio_lock.release()
# process and send music data
if data_updated:
(fft, energy) = process_music_data(data,
is_fft,
is_mel,
n_bins_out,
n_fft,
n_mel,
is_energy,
visualize)
stopTime = time()
elapsedTime = (stopTime - startTime) * 1000
sleepTime = min_delay - elapsedTime
# print('buffer + process time {:.2f} ms, sleep for {:.2f} ms'.format(elapsedTime, sleepTime))
if sleepTime > 0.0:
sleep(sleepTime/1e3)
# message to simulator
message = fft.tobytes() + energy.tobytes()
# print("fft {} energy {}".format(fft, energy))
udp_socket.sendto(message, (udp_host, udp_port))
startTime = time()
# check for key press to quit loop
keypress_lock.acquire()
stop = stop_loop
keypress_lock.release()
if stop:
print("Stopping music processor!")
break
# stop pyaudio thread
stop_pyaudio_thread = True
pa_thread.join()
# stop keypress thread
kp_thread.join()