Module pvinspect.data.datasets
Provides access to demo datasets
Expand source code
"""Provides access to demo datasets"""
from .image import (
ModuleImageSequence,
ModuleImage,
EL_IMAGE,
ImageSequence,
CellImage,
CellImageSequence,
)
from .io import *
from pathlib import Path
from google_drive_downloader import GoogleDriveDownloader as gdd
from typing import Tuple, Dict
import os
import requests
from zipfile import ZipFile
import logging
_DS_PATH = Path(__file__).parent.absolute() / "datasets"
_DS_KEYS = {
"20191219_poly10x6": "1B5fQPLvStuMvuYJ5CxbzyxfwuWQdfNVE",
"20200728_elpv_labels": "1hK_hViiZ1-rHhvI3yGxpC6DSCXAyAFiJ",
}
_ZIP_DS_URLS = {"elpv": "https://github.com/zae-bayern/elpv-dataset/archive/master.zip"}
def _get_dataset_key(name: str):
if name in _DS_KEYS.keys():
return _DS_KEYS[name]
else:
keys = os.getenv("PVINSPECT_KEYS").split(";")
keys = {x.split(",")[0]: x.split(",")[1] for x in keys}
if name in keys.keys():
return keys[name]
else:
raise RuntimeError(
'The specified dataset "{}" could not be found. Maybe you tried \
to access a protected dataset and didn\'t set PVINSPECT_KEYS environment variable?'
)
def _check_and_download_ds(name: str):
ds_path = Path(__file__).parent.absolute() / "datasets" / name
if not ds_path.is_dir():
logging.info("Data is being downloaded..")
k = _get_dataset_key(name)
ds_path.mkdir(parents=True, exist_ok=False)
gdd.download_file_from_google_drive(k, str(ds_path / "data.zip"), unzip=True)
return ds_path
def _check_and_download_zip_ds(name: str) -> Path:
url = _ZIP_DS_URLS[name]
target = _DS_PATH / name
if not target.is_dir():
logging.info("Data is being downloaded..")
target.mkdir()
r = requests.get(url, allow_redirects=True)
open(target / "data.zip", "wb").write(r.content)
zipf = ZipFile(target / "data.zip")
zipf.extractall(target)
return target
def poly10x6(N: int = 0) -> ModuleImageSequence:
"""Read sequence of 10x6 poly modules
Args:
N (int): Only read first N images
"""
p = _check_and_download_ds("20191219_poly10x6")
return read_module_images(p, EL_IMAGE, True, 10, 6, N=N)
def elpv(N: int = 0) -> ImageSequence:
"""Read images from ELPV dataset
Note:
This dataset is part of the following publication:
Deitsch, Sergiu, et al. "Automatic classification of defective photovoltaic module cells in electroluminescence images."
Solar Energy, Elsevier BV, 2019, 185, 455-468.
Additional labels for defect types are provided by the author of this toolbox.
Args:
N (int): Number of images to return. Defaults to using all images.
Returns:
images: Images from the ELPV dataset with defect type annotations as `Image` meta data
"""
# download and read images
images_path = _check_and_download_zip_ds("elpv") / "elpv-dataset-master" / "images"
seq = read_images(images_path, same_camera=False, modality=EL_IMAGE, N=N)
# download and read labels
labels_path = _check_and_download_ds("20200728_elpv_labels") / "labels.csv"
logging.info("Loading labels..")
labels = pd.read_csv(
labels_path,
delimiter=";",
index_col="filename",
dtype={
"defect probability": float,
"wafer": str,
"crack": bool,
"inactive": bool,
"blob": bool,
"finger": bool,
"testset": bool,
},
).rename(columns={"defect probability": "defect_probability"})
# associate images with labels
def label(img: Image):
l = labels.loc["images/{}".format(img.path.name)]
return l.to_dict()
# read images and labels
seq = seq.meta_from_fn(label, progress_bar=False)
return seq
def caip_dataB() -> Tuple[ModuleImageSequence, ModuleImageSequence, ObjectAnnotations]:
"""Read DataB from CAIP paper (private dataset)
Note:
This dataset is from the following publication:
Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images."
International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019.
Returns:
images1: All modules with shape 10x6
images2: All modules with shape 9x4
annot: Annotations specifying the position of modules
"""
p = _check_and_download_ds("20200616_caip_v2")
images1 = read_module_images(
p / "deitsch_testset" / "10x6",
EL_IMAGE,
False,
10,
6,
allow_different_dtypes=True,
)
images2 = read_module_images(
p / "deitsch_testset" / "9x4",
EL_IMAGE,
False,
9,
4,
allow_different_dtypes=True,
)
annot = load_json_object_masks(p / "deitsch_testset" / "module_locations.json")
return images1, images2, annot
def caip_dataC() -> Tuple[ModuleImageSequence, ObjectAnnotations]:
"""Read DataC from CAIP paper (private dataset)
Note:
This dataset is from the following publication:
Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images."
International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019.
Returns:
images: All modules images
annot: Annotations specifying the position of modules
"""
p = _check_and_download_ds("20200616_caip_v2")
annot = load_json_object_masks(p / "multiple" / "module_locations.json")
return read_module_images(p / "multiple", EL_IMAGE, True, 10, 6), annot
def caip_dataD() -> Tuple[ModuleImageSequence, ObjectAnnotations]:
"""Read DataC from CAIP paper (private dataset)
Note:
This dataset is from the following publication:
Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images."
International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019.
Returns:
images: All modules images
annot: Annotations specifying the position of modules
"""
p = _check_and_download_ds("20200616_caip_v2")
annot = load_json_object_masks(p / "rotated" / "module_locations.json")
return read_module_images(p / "rotated", EL_IMAGE, True, 10, 6), annot
def stitching_demo(N: int = 0) -> List[Tuple[Image, Image]]:
"""Data to demonstrate stitching capabilities
Args:
N (int): Number of image pairs that is returened
Returns:
images: List of image pairs
"""
result = list()
images = poly10x6(N)
for image in images:
height = image.shape[0]
# split
img0 = Image(image.data[0 : int(height * 2 // 3)])
img1 = Image(image.data[int(height // 3) :])
result.append((img0, img1))
return result
def calibration_ipv40CCD_FF(N: int = 0) -> Dict[str, ImageSequence]:
"""Flat-field calibration data for ipv40CCD (private dataset)
Args:
N (int): Number of images per excitation
Returns:
images: Dict with excitation as key and images
"""
p = _check_and_download_ds("20200303_calibration_iPV40CCD")
res = dict()
for d in p.glob("FF*"):
key = d.name.split("_")[1]
seq = read_images(path=d, same_camera=False, N=N)
res[key] = seq
return res
def calibration_ipv40CCD_distortion(N: int = 0) -> ImageSequence:
"""Lens calibration data for ipv40CCD (private dataset)
Args:
N (int): Number of images
Returns:
images: Sequence of images
"""
p = _check_and_download_ds("20200303_calibration_iPV40CCD")
return read_images(path=p / "distortion", same_camera=True, N=N)
def multi_module_detection(N: int = 0) -> Tuple[ObjectAnnotations, ImageSequence]:
"""Dataset for multi module detection (private dataset)
Args:
N (int): Number of images
Returns:
anns: Dict of annotations by image
imgs: Sequence of images
"""
p = _check_and_download_ds("20200331_multi_module_detection")
imgs = read_images(
path=p, same_camera=False, N=N, pattern="**/*.png", modality=EL_IMAGE
)
anns = load_json_object_masks(path=p / "labels.json")
return anns, imgs
Functions
def caip_dataB() ‑> Tuple[ModuleImageSequence, ModuleImageSequence, Dict[str, List[Tuple[str, shapely.geometry.polygon.Polygon]]]]
-
Read DataB from CAIP paper (private dataset)
Note
This dataset is from the following publication: Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images." International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019.
Returns
images1
- All modules with shape 10x6
images2
- All modules with shape 9x4
annot
- Annotations specifying the position of modules
Expand source code
def caip_dataB() -> Tuple[ModuleImageSequence, ModuleImageSequence, ObjectAnnotations]: """Read DataB from CAIP paper (private dataset) Note: This dataset is from the following publication: Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images." International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019. Returns: images1: All modules with shape 10x6 images2: All modules with shape 9x4 annot: Annotations specifying the position of modules """ p = _check_and_download_ds("20200616_caip_v2") images1 = read_module_images( p / "deitsch_testset" / "10x6", EL_IMAGE, False, 10, 6, allow_different_dtypes=True, ) images2 = read_module_images( p / "deitsch_testset" / "9x4", EL_IMAGE, False, 9, 4, allow_different_dtypes=True, ) annot = load_json_object_masks(p / "deitsch_testset" / "module_locations.json") return images1, images2, annot
def caip_dataC() ‑> Tuple[ModuleImageSequence, Dict[str, List[Tuple[str, shapely.geometry.polygon.Polygon]]]]
-
Read DataC from CAIP paper (private dataset)
Note
This dataset is from the following publication: Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images." International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019.
Returns
images
- All modules images
annot
- Annotations specifying the position of modules
Expand source code
def caip_dataC() -> Tuple[ModuleImageSequence, ObjectAnnotations]: """Read DataC from CAIP paper (private dataset) Note: This dataset is from the following publication: Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images." International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019. Returns: images: All modules images annot: Annotations specifying the position of modules """ p = _check_and_download_ds("20200616_caip_v2") annot = load_json_object_masks(p / "multiple" / "module_locations.json") return read_module_images(p / "multiple", EL_IMAGE, True, 10, 6), annot
def caip_dataD() ‑> Tuple[ModuleImageSequence, Dict[str, List[Tuple[str, shapely.geometry.polygon.Polygon]]]]
-
Read DataC from CAIP paper (private dataset)
Note
This dataset is from the following publication: Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images." International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019.
Returns
images
- All modules images
annot
- Annotations specifying the position of modules
Expand source code
def caip_dataD() -> Tuple[ModuleImageSequence, ObjectAnnotations]: """Read DataC from CAIP paper (private dataset) Note: This dataset is from the following publication: Hoffmann, Mathis, et al. "Fast and robust detection of solar modules in electroluminescence images." International Conference on Computer Analysis of Images and Patterns. Springer, Cham, 2019. Returns: images: All modules images annot: Annotations specifying the position of modules """ p = _check_and_download_ds("20200616_caip_v2") annot = load_json_object_masks(p / "rotated" / "module_locations.json") return read_module_images(p / "rotated", EL_IMAGE, True, 10, 6), annot
def calibration_ipv40CCD_FF(N: int = 0) ‑> Dict[str, ImageSequence]
-
Flat-field calibration data for ipv40CCD (private dataset)
Args
N
:int
- Number of images per excitation
Returns
images
- Dict with excitation as key and images
Expand source code
def calibration_ipv40CCD_FF(N: int = 0) -> Dict[str, ImageSequence]: """Flat-field calibration data for ipv40CCD (private dataset) Args: N (int): Number of images per excitation Returns: images: Dict with excitation as key and images """ p = _check_and_download_ds("20200303_calibration_iPV40CCD") res = dict() for d in p.glob("FF*"): key = d.name.split("_")[1] seq = read_images(path=d, same_camera=False, N=N) res[key] = seq return res
def calibration_ipv40CCD_distortion(N: int = 0) ‑> ImageSequence
-
Lens calibration data for ipv40CCD (private dataset)
Args
N
:int
- Number of images
Returns
images
- Sequence of images
Expand source code
def calibration_ipv40CCD_distortion(N: int = 0) -> ImageSequence: """Lens calibration data for ipv40CCD (private dataset) Args: N (int): Number of images Returns: images: Sequence of images """ p = _check_and_download_ds("20200303_calibration_iPV40CCD") return read_images(path=p / "distortion", same_camera=True, N=N)
def elpv(N: int = 0) ‑> ImageSequence
-
Read images from ELPV dataset
Note
This dataset is part of the following publication: Deitsch, Sergiu, et al. "Automatic classification of defective photovoltaic module cells in electroluminescence images." Solar Energy, Elsevier BV, 2019, 185, 455-468.
Additional labels for defect types are provided by the author of this toolbox.
Args
N
:int
- Number of images to return. Defaults to using all images.
Returns
images
- Images from the ELPV dataset with defect type annotations as
Image
meta data
Expand source code
def elpv(N: int = 0) -> ImageSequence: """Read images from ELPV dataset Note: This dataset is part of the following publication: Deitsch, Sergiu, et al. "Automatic classification of defective photovoltaic module cells in electroluminescence images." Solar Energy, Elsevier BV, 2019, 185, 455-468. Additional labels for defect types are provided by the author of this toolbox. Args: N (int): Number of images to return. Defaults to using all images. Returns: images: Images from the ELPV dataset with defect type annotations as `Image` meta data """ # download and read images images_path = _check_and_download_zip_ds("elpv") / "elpv-dataset-master" / "images" seq = read_images(images_path, same_camera=False, modality=EL_IMAGE, N=N) # download and read labels labels_path = _check_and_download_ds("20200728_elpv_labels") / "labels.csv" logging.info("Loading labels..") labels = pd.read_csv( labels_path, delimiter=";", index_col="filename", dtype={ "defect probability": float, "wafer": str, "crack": bool, "inactive": bool, "blob": bool, "finger": bool, "testset": bool, }, ).rename(columns={"defect probability": "defect_probability"}) # associate images with labels def label(img: Image): l = labels.loc["images/{}".format(img.path.name)] return l.to_dict() # read images and labels seq = seq.meta_from_fn(label, progress_bar=False) return seq
def multi_module_detection(N: int = 0) ‑> Tuple[Dict[str, List[Tuple[str, shapely.geometry.polygon.Polygon]]], ImageSequence]
-
Dataset for multi module detection (private dataset)
Args
N
:int
- Number of images
Returns
anns
- Dict of annotations by image
imgs
- Sequence of images
Expand source code
def multi_module_detection(N: int = 0) -> Tuple[ObjectAnnotations, ImageSequence]: """Dataset for multi module detection (private dataset) Args: N (int): Number of images Returns: anns: Dict of annotations by image imgs: Sequence of images """ p = _check_and_download_ds("20200331_multi_module_detection") imgs = read_images( path=p, same_camera=False, N=N, pattern="**/*.png", modality=EL_IMAGE ) anns = load_json_object_masks(path=p / "labels.json") return anns, imgs
def poly10x6(N: int = 0) ‑> ModuleImageSequence
-
Read sequence of 10x6 poly modules
Args
N
:int
- Only read first N images
Expand source code
def poly10x6(N: int = 0) -> ModuleImageSequence: """Read sequence of 10x6 poly modules Args: N (int): Only read first N images """ p = _check_and_download_ds("20191219_poly10x6") return read_module_images(p, EL_IMAGE, True, 10, 6, N=N)
def stitching_demo(N: int = 0) ‑> List[Tuple[Image, Image]]
-
Data to demonstrate stitching capabilities
Args
N
:int
- Number of image pairs that is returened
Returns
images
- List of image pairs
Expand source code
def stitching_demo(N: int = 0) -> List[Tuple[Image, Image]]: """Data to demonstrate stitching capabilities Args: N (int): Number of image pairs that is returened Returns: images: List of image pairs """ result = list() images = poly10x6(N) for image in images: height = image.shape[0] # split img0 = Image(image.data[0 : int(height * 2 // 3)]) img1 = Image(image.data[int(height // 3) :]) result.append((img0, img1)) return result