Source code for common_datasets.binary_classification._binary_classification_part1

"""
This module contains the binary classification data loaders
"""

import pandas as pd
import numpy as np

from .._io import (read_csv_data, read_arff_data,
                        load_arff_template_binary,
                        prepare_csv_data_template,
                        DataPreprocessor)

__all__= ['load_kddcup_buffer_overflow_vs_back',
            'load_kddcup_guess_passwd_vs_satan',
            'load_kddcup_land_vs_portsweep',
            'load_kddcup_land_vs_satan',
            'load_kddcup_rootkit_imap_vs_back',
            'load_kr_vs_k_one_vs_fifteen',
            'load_kr_vs_k_three_vs_eleven',
            'load_kr_vs_k_zero_one_vs_draw',
            'load_kr_vs_k_zero_vs_eight',
            'load_kr_vs_k_zero_vs_fifteen',
            'load_poker_8_9_vs_5',
            'load_poker_8_9_vs_6',
            'load_poker_8_vs_6',
            'load_poker_9_vs_7',
            'load_shuttle_2_vs_5',
            'load_shuttle_6_vs_2_3',
            'load_shuttle_c0_vs_c4',
            'load_shuttle_c2_vs_c4',
            'load_vehicle0',
            'load_vehicle1',
            'load_vehicle2',
            'load_vehicle3',
            'load_cm1',
            'load_kc1',
            'load_pc1',
            'load_car_good',
            'load_car_vgood',
            'load_cleveland_0_vs_4',
            'load_dermatology_6',
            'load_flaref',
            'load_led7digit_0_2_4_5_6_7_8_9_vs_1',
            'load_lymphography_normal_fibrosis',
            'load_page_blocks_1_3_vs_4',
            'load_vowel0',
            'load_zoo_3',
            'load_haberman',
            'load_iris0',
            'load_new_thyroid1',
            'load_page_blocks0',
            'load_pima',
            'load_german',
            'load_hepatitis',
            'load_hypothyroid',
            'load_satimage',
            'load_spectf',
            'load_sylva',
            'load_segment0',
            'load_wisconsin',
            'load_mammographic',
            'load_bupa',
            'load_monk_2',
            'load_appendicitis',
            'load_saheart',
            'load_australian',
            'load_crx',
            'load_lymphography',
            'load_wdbc',
            'load_ionosphere',
            'load_ada'
#            'load_hiva'
             ]

[docs]def load_kddcup_buffer_overflow_vs_back(): """ Load the kddcup_buffer_overflow_vs_back dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kddcup-buffer_overflow_vs_back/kddcup-buffer_overflow_vs_back.dat' return load_arff_template_binary(path=path, name="kddcup-buffer_overflow_vs_back", target_label='Class')
[docs]def load_kddcup_guess_passwd_vs_satan(): """ Load the kddcup-guess_passwd_vs_satan dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kddcup-guess_passwd_vs_satan/kddcup-guess_passwd_vs_satan.dat' return load_arff_template_binary(path=path, name="kddcup-guess_passwd_vs_satan", target_label='Class')
[docs]def load_kddcup_land_vs_portsweep(): """ Load the kddcup-land_vs_portsweep dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kddcup-land_vs_portsweep/kddcup-land_vs_portsweep.dat' return load_arff_template_binary(path=path, name="kddcup-land_vs_portsweep", target_label='Class')
[docs]def load_kddcup_land_vs_satan(): """ Load the kddcup-land_vs_satan dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kddcup-land_vs_satan/kddcup-land_vs_satan.dat' return load_arff_template_binary(path=path, name="kddcup-land_vs_satan", target_label='Class')
[docs]def load_kddcup_rootkit_imap_vs_back(): """ Load the kddcup-rootkit-imap_vs_back dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kddcup-rootkit-imap_vs_back/kddcup-rootkit-imap_vs_back.dat' return load_arff_template_binary(path=path, name="kddcup-rootkit-imap_vs_back", target_label='Class')
[docs]def load_kr_vs_k_one_vs_fifteen(): """ Load the kr_vs_k_one_vs_fifteen dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kr-vs-k-one_vs_fifteen/kr-vs-k-one_vs_fifteen.dat' return load_arff_template_binary(path=path, name="kr_vs_k_one_vs_fifteen", target_label='Class')
[docs]def load_kr_vs_k_three_vs_eleven(): """ Load the kr-vs-k-three_vs_eleven dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kr-vs-k-three_vs_eleven/kr-vs-k-three_vs_eleven.dat' return load_arff_template_binary(path=path, name="kr-vs-k-three_vs_eleven", target_label='Class')
[docs]def load_kr_vs_k_zero_one_vs_draw(): """ Load the kr-vs-k-zero-one_vs_draw dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kr-vs-k-zero-one_vs_draw/kr-vs-k-zero-one_vs_draw.dat' return load_arff_template_binary(path=path, name="kr-vs-k-zero-one_vs_draw", target_label='Class')
[docs]def load_kr_vs_k_zero_vs_eight(): """ Load the kr-vs-k-zero_vs_eight dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kr-vs-k-zero_vs_eight/kr-vs-k-zero_vs_eight.dat' return load_arff_template_binary(path=path, name="kr-vs-k-zero_vs_eight", target_label='Class')
[docs]def load_kr_vs_k_zero_vs_fifteen(): """ Load the kr-vs-k-zero_vs_fifteen dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kr-vs-k-zero_vs_fifteen/kr-vs-k-zero_vs_fifteen.dat' return load_arff_template_binary(path=path, name="kr-vs-k-zero_vs_fifteen", target_label='Class')
[docs]def load_poker_8_9_vs_5(): """ Load the poker-8-9_vs_5 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/poker-8-9_vs_5/poker-8-9_vs_5.dat' return load_arff_template_binary(path=path, name="poker-8-9_vs_5", target_label='Class')
[docs]def load_poker_8_9_vs_6(): """ Load the poker-8-9_vs_6 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/poker-8-9_vs_6/poker-8-9_vs_6.dat' return load_arff_template_binary(path=path, name="poker-8-9_vs_6", target_label='Class')
[docs]def load_poker_8_vs_6(): """ Load the poker-8_vs_6 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/poker-8_vs_6/poker-8_vs_6.dat' return load_arff_template_binary(path=path, name="poker-8_vs_6", target_label='Class')
[docs]def load_poker_9_vs_7(): """ Load the poker-9_vs_7 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/poker-9_vs_7/poker-9_vs_7.dat' return load_arff_template_binary(path=path, name="poker-9_vs_7", target_label='Class')
[docs]def load_shuttle_2_vs_5(): """ Load the shuttle-2_vs_5 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/shuttle-2_vs_5/shuttle-2_vs_5.dat' return load_arff_template_binary(path=path, name="shuttle-2_vs_5", target_label='Class')
[docs]def load_shuttle_6_vs_2_3(): """ Load the shuttle-6_vs_2-3 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/shuttle-6_vs_2-3/shuttle-6_vs_2-3.dat' return load_arff_template_binary(path=path, name="shuttle-6_vs_2-3", target_label='Class')
[docs]def load_shuttle_c0_vs_c4(): """ Load the shuttle-c0-vs-c4 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/shuttle-c0-vs-c4/shuttle-c0-vs-c4.dat' return load_arff_template_binary(path=path, name="shuttle-c0-vs-c4", target_label='Class')
[docs]def load_shuttle_c2_vs_c4(): """ Load the shuttle-c2-vs-c4 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/shuttle-c2-vs-c4/shuttle-c2-vs-c4.dat' return load_arff_template_binary(path=path, name="shuttle-c2-vs-c4", target_label='Class')
[docs]def load_vehicle0(): """ Load the vehicle0 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/vehicle0/vehicle0.dat' return load_arff_template_binary(path=path, name="vehicle0", target_label='Class')
[docs]def load_vehicle1(): """ Load the vehicle1 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/vehicle1/vehicle1.dat' return load_arff_template_binary(path=path, name="vehicle1", target_label='Class')
[docs]def load_vehicle2(): """ Load the vehicle2 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/vehicle2/vehicle2.dat' return load_arff_template_binary(path=path, name="vehicle2", target_label='Class')
[docs]def load_vehicle3(): """ Load the vehicle3 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/vehicle3/vehicle3.dat' return load_arff_template_binary(path=path, name="vehicle3", target_label='Class')
[docs]def load_pc1(): """ Load the PC1 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/pc1/pc1.arff' return load_arff_template_binary(path=path, name="PC1", target_label='defects', citation_key='krnn')
[docs]def load_cm1(): """ Load the CM1 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/cm1/cm1.arff.txt' return load_arff_template_binary(path=path, name="CM1", target_label='defects', citation_key='krnn')
[docs]def load_kc1(): """ Load the KC1 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/kc1/kc1.arff.txt' return load_arff_template_binary(path=path, name="KC1", target_label='defects', citation_key='krnn')
[docs]def load_car_good(): """ Load the car_good dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/car-good/car-good.dat' return load_arff_template_binary(path=path, name="car_good", target_label='Class')
[docs]def load_car_vgood(): """ Load the car-vgood dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/car-vgood/car-vgood.dat' return load_arff_template_binary(path=path, name="car-vgood", target_label='Class')
[docs]def load_cleveland_0_vs_4(): """ Load the cleveland-0_vs_4 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/cleveland-0_vs_4/cleveland-0_vs_4_no_null.dat' return load_arff_template_binary(path=path, name="cleveland-0_vs_4", target_label='num')
[docs]def load_dermatology_6(): """ Load the dermatology-6 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/dermatology-6/dermatology-6.dat' return load_arff_template_binary(path=path, name="dermatology-6", target_label='Class')
[docs]def load_flaref(): """ Load the flare-F dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/flare-F/flare-F.dat' return load_arff_template_binary(path=path, name="flare-F", target_label='Class')
[docs]def load_led7digit_0_2_4_5_6_7_8_9_vs_1(): """ Load the led7digit-0-2-4-6-7-8-9_vs_1 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/led7digit-0-2-4-5-6-7-8-9_vs_1/led7digit-0-2-4-5-6-7-8-9_vs_1.dat' return load_arff_template_binary(path=path, name="led7digit-0-2-4-6-7-8-9_vs_1", target_label='number')
[docs]def load_lymphography_normal_fibrosis(): """ Load the lymphography-normal-fibrosis dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/lymphography-normal-fibrosis/lymphography-normal-fibrosis.dat' return load_arff_template_binary(path=path, name="lymphography-normal-fibrosis", target_label='Class')
[docs]def load_page_blocks_1_3_vs_4(): """ Load the page-blocks-1-3_vs_4 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/page-blocks-1-3_vs_4/page-blocks-1-3_vs_4.dat' return load_arff_template_binary(path=path, name="page-blocks-1-3_vs_4", target_label='Class')
[docs]def load_vowel0(): """ Load the vowel0 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/vowel0/vowel0.dat' return load_arff_template_binary(path=path, name="vowel0", target_label='Class')
[docs]def load_zoo_3(): """ Load the zoo-3 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/zoo-3/zoo-3.dat' return load_arff_template_binary(path=path, name="zoo-3", target_label='Class')
[docs]def load_haberman(): """ Load the haberman dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/haberman/haberman.dat' return load_arff_template_binary(path=path, name="haberman", target_label='Class')
[docs]def load_iris0(): """ Load the iris0 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/iris0/iris0.dat' return load_arff_template_binary(path=path, name="iris0", target_label='Class')
[docs]def load_new_thyroid1(): """ Load the new_thyroid1 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/new_thyroid1/new-thyroid1.dat' return load_arff_template_binary(path=path, name="new_thyroid1", target_label='Class')
#def load_new_thyroid2(): # """ # Load the new_thyroid2 dataset # # Returns: # dict: the dataset in sklearn.datasets representation # """ # path = 'data/classification/new_thyroid2/new_thyroid2.dat' # return load_arff_template_binary(path=path, # name="new_thyroid2", # target_label='Class')
[docs]def load_page_blocks0(): """ Load the page_blocks0 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/page-blocks0/page-blocks0.dat' return load_arff_template_binary(path=path, name="new_thyroid2", target_label='Class')
[docs]def load_pima(): """ Load the pima dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/pima/pima.dat' return load_arff_template_binary(path=path, name="pima", target_label='Class')
[docs]def load_segment0(): """ Load the segment0 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/segment0/segment0.dat' return load_arff_template_binary(path=path, name="segment0", target_label='Class')
[docs]def load_wisconsin(): """ Load the wisconsin dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/wisconsin/wisconsin.dat' return load_arff_template_binary(path=path, name="wisconsin", target_label='Class')
[docs]def load_mammographic(): """ Load the mammographic dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/mammographic/mammographic.dat' return load_arff_template_binary(path=path, name="mammographic", target_label='Severity')
[docs]def load_appendicitis(): """ Load the appendicitis dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/appendicitis/appendicitis.dat' return load_arff_template_binary(path=path, name="appendicitis", target_label='Class')
[docs]def load_saheart(): """ Load the saheart dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/saheart/saheart.dat' return load_arff_template_binary(path=path, name="saheart", target_label='Chd')
[docs]def load_australian(): """ Load the australian dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/australian/australian.dat' return load_arff_template_binary(path=path, name="australian", target_label='Class')
[docs]def load_monk_2(): """ Load the monk-2 dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/monk-2/monk-2.dat' return load_arff_template_binary(path=path, name='monk-2', target_label='Class')
[docs]def load_wdbc(): """ Load the wdbc dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/wdbc/wdbc.dat' return load_arff_template_binary(path=path, name="wdbc", target_label='Class')
[docs]def load_ionosphere(): """ Load the ionosphere dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/ionosphere/ionosphere.dat' return load_arff_template_binary(path=path, name="ionosphere", target_label='Class')
#def load_spectfheart(): # """ # Load the spectfheart dataset # # Returns: # dict: the dataset in sklearn.datasets representation # """ # path = 'data/classification/spectfheart/spectfheart.dat' # return load_arff_template_binary(path=path, # name="spectfheart", # target_label='OVERALL_DIAGNOSIS', # revert_target=True)
[docs]def load_bupa(): """ Load the bupa dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/bupa/bupa.dat' return load_arff_template_binary(path=path, name="bupa", target_label='Selector')
[docs]def load_crx(): """ Load the crx dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/crx/crx.dat' return load_arff_template_binary(path=path, name="crx", target_label='Class')
[docs]def load_lymphography(): """ Load the lymphography dataset Returns: dict: the dataset in sklearn.datasets representation """ path = 'data/classification/lymphography/lymphography.dat' dataset_raw, meta= read_arff_data(path) feature_types = {attr: item.type_name for attr, item in meta._attributes.items()} # pylint: disable=protected-access dataset_raw = pd.DataFrame(dataset_raw, columns=list(feature_types.keys())) target_col = dataset_raw.columns[-1] dataset_raw.loc[dataset_raw[target_col] == b'metastases', target_col]= 0 dataset_raw.loc[dataset_raw[target_col] == b'malign_lymph', target_col]= 0 dataset_raw.loc[dataset_raw[target_col] == b'normal', target_col]= 0 dataset_raw.loc[dataset_raw[target_col] == b'fibrosis', target_col]= 1 dataset_raw[target_col]= dataset_raw[target_col].astype(int) dataprep = DataPreprocessor(dataset_raw, feature_types=feature_types, target_label=target_col, name='lymphography', citation_key='keel') dataset = dataprep.get_dataset() return dataset
####### # csv # #######
[docs]def load_ada(): """ Loads the ada dataset Returns: dict: the dataset in sklearn.datasets representation """ data = read_csv_data('data/classification/ada/ada_train.data', sep=' ', header=None) labels = read_csv_data('data/classification/ada/ada_train.labels', header=None) # dummy column replaced by labels due to trailing separator characters data[48] = labels data = data.rename({48: 'target_label'}, axis='columns') data[data == '?'] = np.nan data = data.astype(float) return prepare_csv_data_template(dataset=data, name='ADA', target_label='target_label')
#def load_hiva(): # """ # Loads the hiva dataset # # Returns: # dict: the dataset in sklearn.datasets representation # """ # data = read_csv_data('data/classification/hiva/hiva_train.data', sep=' ', header=None) # labels = read_csv_data('data/classification/hiva/hiva_train.labels', header=None) # # # dummy column replaced by labels due to trailing separator characters # data[48] = labels # data = data.rename({48: 'target_label'}, axis='columns') # # data[data == '?'] = np.nan # data = data.astype(float) # # return prepare_csv_data_template(dataset=data, # name='HIVA', # target_label='target_label') #def load_glass(): # """ # Loads the glass dataset # # Returns: # dict: the dataset in sklearn.datasets representation # """ # dataset = read_csv_data('data/classification/glass/glass.data.txt') # dataset.columns = list(dataset.columns[:-1]) + ['target'] # dataset.loc[dataset['target'] != 3, 'target'] = 0 # # return prepare_csv_data_template(dataset=dataset, # name='glass', # target_label='target')
[docs]def load_hypothyroid(): """ Loads the hypothyroid dataset Returns: dict: the dataset in sklearn.datasets representation """ dataset = read_csv_data('data/classification/hypothyroid/hypothyroid.data.txt') dataset.columns= [str(col) for col in dataset.columns] dataset[dataset == '?'] = np.nan dataset[dataset == 'f'] = 0 dataset[dataset == 't'] = 1 dataset[dataset == 'F'] = 0 dataset[dataset == 'T'] = 1 dataset[dataset == 'n'] = 0 dataset[dataset == 'y'] = 1 dataset[dataset == 'M'] = 0 dataset[dataset == 'F'] = 1 dataset[dataset.columns[1:]] = dataset[dataset.columns[1:]].astype(float) dataset.columns = ['target'] + list(dataset.columns[1:]) return prepare_csv_data_template(dataset=dataset, name='hypothyroid', target_label='target')
def load_sylva(): """ Loads the sylva dataset Returns: dict: the dataset in sklearn.datasets representation """ database_raw= read_csv_data('data/classification/sylva/sylva_train.data', sep= ' ') # removing last column due to trailing whitespaces del database_raw[database_raw.columns[-1]] target= read_csv_data('data/classification/sylva/sylva_train.labels') database_raw['target']= target return prepare_csv_data_template(dataset=database_raw, name='sylva', target_label='target')
[docs]def load_spectf(): """ Loads the spectf dataset Returns: dict: the dataset in sklearn.datasets representation """ db0= read_csv_data('data/classification/spect_f/SPECTF.train.txt') db1= read_csv_data('data/classification/spect_f/SPECTF.test.txt') dataset= pd.concat([db0, db1]) dataset.columns= ['target'] + list(dataset.columns[1:]) return prepare_csv_data_template(dataset=dataset, name='SPECTF', target_label='target')
[docs]def load_hepatitis(): """ Loads the hepatitis dataset Returns: dict: the dataset in sklearn.datasets representation """ dataset= read_csv_data('data/classification/hepatitis/hepatitis.data.txt') dataset.columns= ['target'] + list(dataset.columns[1:]) dataset[dataset == '?'] = np.nan dataset = dataset.astype(float) return prepare_csv_data_template(dataset=dataset, name='hepatitis', target_label='target')
#def load_vehicle(): # """ # Loads the vehicle dataset # # Returns: # dict: the dataset in sklearn.datasets representation # """ # db0= read_csv_data('data/classification/vehicle/xaa.dat.txt', sep= ' ', usecols= range(19)) # db1= read_csv_data('data/classification/vehicle/xab.dat.txt', sep= ' ', usecols= range(19)) # db2= read_csv_data('data/classification/vehicle/xac.dat.txt', sep= ' ', usecols= range(19)) # db3= read_csv_data('data/classification/vehicle/xad.dat.txt', sep= ' ', usecols= range(19)) # db4= read_csv_data('data/classification/vehicle/xae.dat.txt', sep= ' ', usecols= range(19)) # db5= read_csv_data('data/classification/vehicle/xaf.dat.txt', sep= ' ', usecols= range(19)) # db6= read_csv_data('data/classification/vehicle/xag.dat.txt', sep= ' ', usecols= range(19)) # db7= read_csv_data('data/classification/vehicle/xah.dat.txt', sep= ' ', usecols= range(19)) # db8= read_csv_data('data/classification/vehicle/xai.dat.txt', sep= ' ', usecols= range(19)) # # dataset= pd.concat([db0, db1, db2, db3, db4, db5, db6, db7, db8]) # # dataset.columns= list(dataset.columns[:-1]) + ['target'] # dataset.loc[dataset['target'] != 'van', 'target']= 'other' # # return prepare_csv_data_template(dataset=dataset, # name='vehicle', # target_label='target') def load_german(): """ Loads the german dataset Returns: dict: the dataset in sklearn.datasets representation """ dataset = read_csv_data('data/classification/german/german.data-numeric.txt', sep= '\t') dataset.columns= list(dataset.columns[:-1]) + ['target'] return prepare_csv_data_template(dataset=dataset, name='german', target_label='target')
[docs]def load_satimage(): """ Loads the satimage dataset Returns: dict: the dataset in sklearn.datasets representation """ db0= read_csv_data('data/classification/satimage/sat.trn.txt', sep= ' ') db1= read_csv_data('data/classification/satimage/sat.tst.txt', sep= ' ') dataset= pd.concat([db0, db1]) dataset.columns= list(dataset.columns[:-1]) + ['target'] dataset.loc[dataset['target'] != 4, 'target']= 0 return prepare_csv_data_template(dataset=dataset, name='SATIMAGE', target_label='target')