-
Notifications
You must be signed in to change notification settings - Fork 0
/
codebench.py
159 lines (130 loc) · 4.92 KB
/
codebench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import os
import json
from glob import glob
from json import JSONDecodeError
import pandas as pd
import dateutil
from functools import lru_cache
def parse_codemirror(path):
components = os.path.normpath(path).split(os.sep)
student = int(components[-3])
assign_excercise = components[-1].split('.')[0]
assignment = assign_excercise.split('_')[0]
klass = components[-5]
semester = components[-6]
_, _, deadline = parse_assessment_data(
path+f"/../../../../assessments/{assignment}.data"
)
with open(path, encoding='utf-8') as f:
log = f.readlines()
events = []
for line in log:
if not line.strip():
continue
try:
time, event_type, data = line.split("#", 2)
except ValueError:
# Some events skip the data section
try:
time, event_type = line.split("#", 2)
except ValueError:
# Some lines contain errors in codemirror itself instead of logs,
# we just ignore them and continue passing what events we can.
continue
try:
t = pd.to_datetime(time)
except dateutil.parser._parser.ParserError:
continue
time = deadline - t
event_type_map = {
"focus": "focus_gained",
"blur": "focus_lost",
"paste": "text_paste"
}
if event_type == "change":
try:
data = json.loads(data.strip() or "{}")
except AttributeError as e:
print(line)
raise e
except JSONDecodeError:
continue
if "".join(data['text']).strip():
events.append((semester, klass, student, assign_excercise, time, "text_insert"))
if "".join(data['removed']).strip():
events.append((semester, klass, student, assign_excercise, time, "text_remove"))
elif event_type in event_type_map.keys():
events.append((semester, klass, student, assign_excercise, time, event_type_map[event_type]))
else:
pass
return events
def parse_run_log(path):
components = os.path.normpath(path).split(os.sep)
student = int(components[-3])
assign_excercise = components[-1].split('.')[0]
assignment = assign_excercise.split('_')[0]
klass = components[-5]
semester = components[-6]
with open(path, encoding='utf-8') as f:
log = f.readlines()
filtered = [line[3:].strip().split(maxsplit=1) for line in log if line.startswith("==")]
_, _, deadline = parse_assessment_data(
path+f"/../../../../assessments/{assignment}.data"
)
event_type_map = {
"TEST": "run",
"SUBMITION": "submit"
}
return [(
semester,
klass,
student,
assign_excercise,
deadline-pd.to_datetime(line[1][1:-1]),
event_type_map[line[0]]
) for line in filtered]
def parse_final_grade(path):
components = os.path.normpath(path).split(os.sep)
student = int(components[-3])
klass = components[-5]
semester = components[-6]
try:
with open(path) as f:
return (semester, klass, student, float(f.read().strip()))
except FileNotFoundError:
return (semester, klass, student, float('nan'))
@lru_cache
def parse_assessment_data(path):
components = os.path.normpath(path).split(os.sep)
assessment = components[-1].split('.')[0]
with open(path) as f:
lines = f.readlines()
start = pd.to_datetime(next(line for line in lines if line.startswith("---- start:"))[12:])
end = pd.to_datetime(next(line for line in lines if line.startswith("---- end:"))[10:])
return (assessment, start, end)
def first_assessment_for_class(klass):
"Get the ID of the first assessment for the class, this is used for all predictions."
df = pd.DataFrame(
[parse_assessment_data(path) for path in glob(rf"{klass}/assessments/*.data")],
columns=["id", "start", "end"]
).sort_values('start')
try:
return df.iloc[0].id
except IndexError:
return None
def assessments(klass):
return glob(rf"{klass}/assessments/*.data")
def semesters(root):
return [sem for sem in glob(rf'{root}/*') if os.path.isdir(sem)]
def classes(semester):
return glob(rf'{semester}/*')
def users(klass):
return glob(rf'{klass}/users/*')
def logs_for_user(user, assessment):
logs = []
# Some students might not have used codemirror! In this case we only have run/submit logs
for excercise in glob(f'{user}/codemirror/{assessment}_*.log'):
logs.extend(parse_codemirror(excercise))
for excercise in glob(f'{user}/executions/{assessment}_*.log'):
logs.extend(parse_run_log(excercise))
return pd.DataFrame(logs, columns=["semester", "class", "student", "assignment_excercise", "time_to_deadline", "event_type"])