-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2_data_splitting.py
23 lines (19 loc) · 923 Bytes
/
2_data_splitting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob
from sympy.core.random import shuffle
dataset_name = "photo_album"
# This just takes a sample of the original dataset and creates
# a new dataset which is has a random 300 samples.
# For real work we probably would just want to tag this data. Something like training,
# test, validation
if __name__ == "__main__":
print("starting")
dataset = fo.load_dataset(dataset_name)
# A DatasetView is a shallow copy of the original dataset.This means,
# if your dataset is persisted, any data or schema changes to the view will be persisted to the original dataset.
# If you don't want to affect the original dataset you must make the view a clone of the original data.
shuffle_view = dataset.shuffle()
training_view = shuffle_view[0:300].clone("play_photos", persistent=True)
training_view.save()
print("finished")