test_tsio.py 12.4 KB
Newer Older
1
2
3
4
5
6
7
8
# coding: utf-8
from pathlib import Path
from datetime import datetime
from dateutil import parser

import pandas as pd
import numpy as np
from mock import patch
9
import pytest
10

11
from tshistory.tsio import TimeSerie
12
13
14
15

DATADIR = Path(__file__).parent / 'data'


16
17
18
19
20
21
22
def assert_group_equals(g1, g2):
    for (n1, s1), (n2, s2) in zip(sorted(g1.items()),
                                  sorted(g2.items())):
        assert n1 == n2
        assert s1.equals(s2)


23
def test_changeset(engine):
24
25
26
27
    # instantiate one time serie handler object
    tso = TimeSerie()

    index = pd.date_range(start=datetime(2017, 1, 1), freq='D', periods=3)
28
    data = [1., 2., 3.]
29
30
31

    with engine.connect() as cnx:
        with tso.newchangeset(cnx, 'babar'):
32
            tso.insert(cnx, pd.Series(data, index=index), 'ts_values')
33
            tso.insert(cnx, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
34

35
36
37
38
    g = tso.get_group(engine, 'ts_values')
    g2 = tso.get_group(engine, 'ts_othervalues')
    assert_group_equals(g, g2)

39
40
41
    with pytest.raises(AssertionError):
        tso.insert(engine, pd.Series([2,3,4], index=index), 'ts_values')

42
43
44
45
46
    with engine.connect() as cnx:
        data.append(data.pop(0))
        with tso.newchangeset(cnx, 'celeste'):
            tso.insert(cnx, pd.Series(data, index=index), 'ts_values')
            # below should be a noop
47
            tso.insert(cnx, pd.Series(['a', 'b', 'c'], index=index), 'ts_othervalues')
48

49
50
51
    g = tso.get_group(engine, 'ts_values')
    assert ['ts_values'] == list(g.keys())

52
53
54
55
56
57
58
    assert """
2017-01-01    2.0
2017-01-02    3.0
2017-01-03    1.0
""".strip() == tso.get(engine, 'ts_values').to_string().strip()

    assert """
59
60
61
2017-01-01    a
2017-01-02    b
2017-01-03    c
62
63
""".strip() == tso.get(engine, 'ts_othervalues').to_string().strip()

64
    assert tso.delete_last_changeset_for(engine, 'ts_values')
65
66
67
68
69
70
71
72

    assert """
2017-01-01    1.0
2017-01-02    2.0
2017-01-03    3.0
""".strip() == tso.get(engine, 'ts_values').to_string().strip()

    assert """
73
74
75
2017-01-01    a
2017-01-02    b
2017-01-03    c
76
77
""".strip() == tso.get(engine, 'ts_othervalues').to_string().strip()

78
    assert tso.delete_last_changeset_for(engine, 'ts_values')
79
80
81
    assert tso.get(engine, 'ts_values') is None
    assert tso.get(engine, 'ts_othervalues') is None

82
83
    assert not tso.delete_last_changeset_for(engine, 'ts_values')

84

85
def test_differential(engine):
86
87
    # instantiate one time serie handler object
    tso = TimeSerie()
88
89
90

    ts_begin = pd.Series(range(10))
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=10)
91
    tso.insert(engine, ts_begin, 'ts_test', 'test')
92
93

    assert """
94
95
96
97
98
99
100
101
102
103
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
104
""".strip() == tso.get(engine, 'ts_test').to_string().strip()
105
106

    # we should detect the emission of a message
107
    tso.insert(engine, ts_begin, 'ts_test', 'babar')
108
109

    assert """
110
111
112
113
114
115
116
117
118
119
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    3.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    6.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
120
""".strip() == tso.get(engine, 'ts_test').to_string().strip()
121
122
123
124

    ts_slight_variation = ts_begin.copy()
    ts_slight_variation.iloc[3] = 0
    ts_slight_variation.iloc[6] = 0
125
    tso.insert(engine, ts_slight_variation, 'ts_test', 'celeste')
126
127

    assert """
128
129
130
131
132
133
134
135
136
137
2010-01-01    0.0
2010-01-02    1.0
2010-01-03    2.0
2010-01-04    0.0
2010-01-05    4.0
2010-01-06    5.0
2010-01-07    0.0
2010-01-08    7.0
2010-01-09    8.0
2010-01-10    9.0
138
""".strip() == tso.get(engine, 'ts_test').to_string().strip()
139
140
141
142
143
144
145

    ts_longer = pd.Series(range(15))
    ts_longer.index = pd.date_range(start=datetime(2010, 1, 3), freq='D', periods=15)
    ts_longer.iloc[1] = 2.48
    ts_longer.iloc[3] = 3.14
    ts_longer.iloc[5] = ts_begin.iloc[7]

146
    tso.insert(engine, ts_longer, 'ts_test', 'test')
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

    assert """
2010-01-01     0.00
2010-01-02     1.00
2010-01-03     0.00
2010-01-04     2.48
2010-01-05     2.00
2010-01-06     3.14
2010-01-07     4.00
2010-01-08     7.00
2010-01-09     6.00
2010-01-10     7.00
2010-01-11     8.00
2010-01-12     9.00
2010-01-13    10.00
2010-01-14    11.00
2010-01-15    12.00
2010-01-16    13.00
2010-01-17    14.00
166
""".strip() == tso.get(engine, 'ts_test').to_string().strip()
167
168
169
170
171

    # start testing manual overrides
    ts_begin = pd.Series([2] * 5)
    ts_begin.index = pd.date_range(start=datetime(2010, 1, 1), freq='D', periods=5)
    ts_begin.loc['2010-01-04'] = -1
172
    tso.insert(engine, ts_begin, 'ts_mixte', 'test')
173
174
175

    # -1 represents bogus upstream data
    assert """
176
177
178
179
180
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
181
""".strip() == tso.get(engine, 'ts_mixte').to_string().strip()
182
183
184
185
186

    # refresh all the period + 1 extra data point
    ts_more = pd.Series([2] * 5)
    ts_more.index = pd.date_range(start=datetime(2010, 1, 2), freq='D', periods=5)
    ts_more.loc['2010-01-04'] = -1
187
    tso.insert(engine, ts_more, 'ts_mixte', 'test')
188
189
190
191
192
193
194
195

    assert """
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
196
""".strip() == tso.get(engine, 'ts_mixte').to_string().strip()
197
198
199
200

    # just append an extra data point
    ts_one_more = pd.Series([3])  # with no intersection with the previous ts
    ts_one_more.index = pd.date_range(start=datetime(2010, 1, 7), freq='D', periods=1)
201
    tso.insert(engine, ts_one_more, 'ts_mixte', 'test')
202
203
204
205
206
207
208
209
210

    assert """
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
211
""".strip() == tso.get(engine, 'ts_mixte').to_string().strip()
212

213
    hist = pd.read_sql('select id, parent from timeserie.ts_test order by id',
214
215
                        engine)
    assert """
216
217
218
219
220
221
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
""".strip() == hist.to_string().strip()

222
    hist = pd.read_sql('select id, parent from timeserie.ts_mixte order by id',
223
224
225
226
227
228
229
230
                        engine)
    assert """
   id  parent
0   1     NaN
1   2     1.0
2   3     2.0
""".strip() == hist.to_string().strip()

231
    allts = pd.read_sql("select name, table_name from registry "
232
                        "where name in ('ts_test', 'ts_mixte')",
233
234
235
                        engine)

    assert """
236
237
238
       name          table_name
0   ts_test   timeserie.ts_test
1  ts_mixte  timeserie.ts_mixte
239
240
241
242
243
244
245
246
247
248
""".strip() == allts.to_string().strip()

    assert """
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    3.0
249
250
""".strip() == tso.get(engine, 'ts_mixte',
                       revision_date=datetime.now()).to_string().strip()
251
252

    # test striping the last diff
253
    assert tso.delete_last_changeset_for(engine, 'ts_mixte')
254
255
256
257
258
259
260
261

    assert """
2010-01-01    2.0
2010-01-02    2.0
2010-01-03    2.0
2010-01-04   -1.0
2010-01-05    2.0
2010-01-06    2.0
262
""".strip() == tso.get(engine, 'ts_mixte').to_string().strip()
263
264
265


def test_bad_import(engine):
266
267
268
    # instantiate one time serie handler object
    tso = TimeSerie()

269
270
271
272
273
    # the data were parsed as date by pd.read_json()
    df_result = pd.read_csv(DATADIR / 'test_data.csv')
    df_result['Gas Day'] = df_result['Gas Day'].apply(parser.parse, dayfirst=True, yearfirst=False)
    df_result.set_index('Gas Day', inplace=True)
    ts = df_result['SC']
274
275
276

    tso.insert(engine, ts, 'SND_SC', 'test')
    result = tso.get(engine, 'SND_SC')
277
    assert result.dtype == 'float64'
278
279
280

    # insertion of empty ts
    ts = pd.Series(name='truc', dtype='object')
281
282
    tso.insert(engine, ts, 'empty_ts', 'test')
    assert tso.get(engine, 'empty_ts') is None
283
284
285
286
287
288

    # nan in ts
    # all na
    ts = pd.Series([np.nan] * 10,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
289
290
    tso.insert(engine, ts, 'test_nan', 'test')
    assert tso.get(engine, 'test_nan') is None
291
292
293
294
295

    # mixe na
    ts = pd.Series([np.nan] * 5 + [3] * 5,
                   index=pd.date_range(start=datetime(2010, 1, 10),
                                       freq='D', periods=10), name='truc')
296
297
    tso.insert(engine, ts, 'test_nan', 'test')
    result = tso.get(engine, 'test_nan')
298

299
300
    tso.insert(engine, ts, 'test_nan', 'test')
    result = tso.get(engine, 'test_nan')
301
302
303
304
305
306
307
308
309
310
    assert """
2010-01-15    3.0
2010-01-16    3.0
2010-01-17    3.0
2010-01-18    3.0
2010-01-19    3.0
""".strip() == result.to_string().strip()

    # get_ts with name not in database

311
    tso.get(engine, 'inexisting_name', 'test')
312
313
314


def test_revision_date(engine):
315
316
317
    # instantiate one time serie handler object
    tso = TimeSerie()

318
319
320
321
322
323
    with patch('tshistory.tsio.datetime') as mock_date:
        mock_date.now.return_value = datetime(2015, 1, 1, 15, 43, 23)

        ts = pd.Series([1] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
324
        tso.insert(engine, ts, 'ts_through_time', 'test')
325
326
327
328
329
330
331

    with patch('tshistory.tsio.datetime') as mock_date:
        mock_date.now.return_value = datetime(2015, 1, 2, 15, 43, 23)

        ts = pd.Series([2] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
332
        tso.insert(engine, ts, 'ts_through_time', 'test')
333
334
335
336
337
338
339

    with patch('tshistory.tsio.datetime') as mock_date:
        mock_date.now.return_value = datetime(2015, 1, 3, 15, 43, 23)

        ts = pd.Series([3] * 4,
                       index=pd.date_range(start=datetime(2010, 1, 4),
                                           freq='D', periods=4), name='truc')
340
        tso.insert(engine, ts, 'ts_through_time', 'test')
341

342
    ts = tso.get(engine, 'ts_through_time')
343
344

    assert """
345
346
347
348
2010-01-04    3.0
2010-01-05    3.0
2010-01-06    3.0
2010-01-07    3.0
349
350
""".strip() == ts.to_string().strip()

351
352
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2015, 1, 2, 18, 43, 23) )
353
354

    assert """
355
356
357
358
2010-01-04    2.0
2010-01-05    2.0
2010-01-06    2.0
2010-01-07    2.0
359
360
""".strip() == ts.to_string().strip()

361
362
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2015, 1, 1, 18, 43, 23))
363
364

    assert """
365
366
367
368
2010-01-04    1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
369
370
""".strip() == ts.to_string().strip()

371
372
    ts = tso.get(engine, 'ts_through_time',
                 revision_date=datetime(2014, 1, 1, 18, 43, 23))
373
374
375

    assert ts is None

376
377
378

def test_snapshots(engine):
    tso = TimeSerie()
379
    tso._snapshot_interval = 4
380
381

    with engine.connect() as cnx:
382
        for tscount in range(1, 11):
383
384
385
            ts = pd.Series([1] * tscount,
                           index=pd.date_range(datetime(2015, 1, 1),
                                               freq='D', periods=tscount))
386
387
388
389
390
            diff = tso.insert(cnx, ts, 'growing', 'babar')
            assert diff.index[0] == diff.index[-1] == ts.index[-1]

    diff = tso.insert(engine, ts, 'growing', 'babar')
    assert diff is None
391

392
393
    df = pd.read_sql("select id from timeserie.growing where snapshot is not null",
                     engine)
394
395
396
    assert """
   id
0   1
397
398
399
1   4
2   8
3  10
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
""".strip() == df.to_string().strip()

    ts = tso.get(engine, 'growing')
    assert """
2015-01-01    1.0
2015-01-02    1.0
2015-01-03    1.0
2015-01-04    1.0
2015-01-05    1.0
2015-01-06    1.0
2015-01-07    1.0
2015-01-08    1.0
2015-01-09    1.0
2015-01-10    1.0
""".strip() == ts.to_string().strip()
415

416
    df = pd.read_sql("select id, diff, snapshot from timeserie.growing order by id", engine)
417
418
419
420
421
    for attr in ('diff', 'snapshot'):
        df[attr] = df[attr].apply(lambda x: 0 if x is None else len(x))

    assert """
   id  diff  snapshot
Arnaud Campeas's avatar
Arnaud Campeas committed
422
423
424
425
426
427
428
429
430
431
0   1     0        32
1   2    32         0
2   3    32         0
3   4    32       125
4   5    32         0
5   6    32         0
6   7    32         0
7   8    32       249
8   9    32         0
9  10    32       311
432
""".strip() == df.to_string().strip()
433
434
435
436
437

    table = tso._get_ts_table(engine, 'growing')
    snapid, snap = tso._find_snapshot(engine, table, ())
    assert snapid == 10
    assert (ts == snap).all()