# Sebastian Raschka 2014-2024
# mlxtend Machine Learning Library Extensions
#
# A function for loading the open-source Boston Housing dataset.
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import os

import numpy as np

this_dir, this_filename = os.path.split(__file__)
DATA_PATH = os.path.join(this_dir, "data", "boston_housing.csv")


def boston_housing_data():
    """Boston Housing dataset.

    Source : https://archive.ics.uci.edu/ml/datasets/Housing
    Number of samples : 506

    Continuous target variable : MEDV
    MEDV = Median value of owner-occupied homes in $1000's

    Dataset Attributes:

        - 1) CRIM      per capita crime rate by town
        - 2) ZN        proportion of residential land zoned for lots over
                 25,000 sq.ft.
        - 3) INDUS     proportion of non-retail business acres per town
        - 4) CHAS      Charles River dummy variable (= 1 if tract bounds
                 river; 0 otherwise)
        - 5) NOX       nitric oxides concentration (parts per 10 million)
        - 6) RM        average number of rooms per dwelling
        - 7) AGE       proportion of owner-occupied units built prior to 1940
        - 8) DIS       weighted distances to five Boston employment centres
        - 9) RAD       index of accessibility to radial highways
        - 10) TAX      full-value property-tax rate per $10,000
        - 11) PTRATIO  pupil-teacher ratio by town
        - 12) B        1000(Bk - 0.63)^2 where Bk is the prop. of b. by town
        - 13) LSTAT    % lower status of the population

    Returns
    --------
    X, y : [n_samples, n_features], [n_class_labels]
        X is the feature matrix with 506 housing samples as rows
        and 13 feature columns.
        y is a 1-dimensional array of the continuous target variable MEDV

    Examples
    -----------
    For usage examples, please see
    https://rasbt.github.io/mlxtend/user_guide/data/boston_housing_data/

    """
    tmp = np.genfromtxt(fname=DATA_PATH, delimiter=",", dtype=float)
    X, y = tmp[:, :-1], tmp[:, -1]
    return X, y
