I need help with an attribute error generated by my school's grader.

My task: I need to create a custom transformer to be input into a grader.

The grader passes a list of dictionaries to the predict or predict_proba method of my estimator, not a DataFrame. This means that the model must work with both data types. For this reason, I need to provide a custom ColumnSelectTransformer to use instead scikit-learn's own ColumnTransformer.

This is my code for the custom transformer that aims to impute null values in the columns provided.

from sklearn.impute import SimpleImputer

simple_cols = ['BEDCERT', 'RESTOT', 'INHOSP', 'CCRC_FACIL', 'SFF', 'CHOW_LAST_12MOS', 'SPRINKLER_STATUS', 'EXP_TOTAL', 'ADJ_TOTAL']

class ColumnSelectTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        if not isinstance(X, pd.DataFrame):
            X = pd.DataFrame(X)
        return X[self.columns].values

simple_features = Pipeline([
    ('cst', ColumnSelectTransformer(simple_cols)),
    ('imputer', SimpleImputer(strategy='mean')),
])

I am then tasked to create a new pipeline and fit it with an estimator, and below is my attempt.

from sklearn.linear_model import LinearRegression

simple_features_model = Pipeline([
    ('simple', simple_features),
    ('linear', LinearRegression()),
])

simple_features_model.fit(data, fine_counts > 0)

The pipeline is generated successfully

Pipeline(memory=None,
         steps=[('simple',
                 Pipeline(memory=None,
                          steps=[('cst',
                                  ColumnSelectTransformer(columns=['BEDCERT',
                                                                   'RESTOT',
                                                                   'INHOSP',
                                                                   'CCRC_FACIL',
                                                                   'SFF',
                                                                   'CHOW_LAST_12MOS',
                                                                   'SPRINKLER_STATUS',
                                                                   'EXP_TOTAL',
                                                                   'ADJ_TOTAL'])),
                                 ('imputer',
                                  SimpleImputer(add_indicator=False, copy=True,
                                                fill_value=None,
                                                missing_values=nan,
                                                strategy='mean', verbose=0))],
                          verbose=False)),
                ('linear',
                 LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
                                  normalize=False))],
         verbose=False)

However, when I pass my simple_features_model into my school's grader

def positive_probability(model):
    def predict_proba(X):
        return model.predict_proba(X)[:, 1]
    return predict_proba

grader.score.ml__simple_features(positive_probability(simple_features_model))

I get the following error

AttributeError                            Traceback (most recent call last)
<ipython-input-87-243f592b48ee> in <module>()
      4     return predict_proba
      5 
----> 6 grader.score.ml__simple_features(positive_probability(simple_features_model))

/opt/conda/lib/python3.7/site-packages/static_grader/grader.py in func(*args, **kw)
     92   def __getattr__(self, method):
     93     def func(*args, **kw):
---> 94       return self(method, *args, **kw)
     95     return func
     96 

/opt/conda/lib/python3.7/site-packages/static_grader/grader.py in __call__(self, question_name, func)
     88       return
     89     test_cases = json.loads(resp.text)
---> 90     test_cases_grading(question_name, func, test_cases)
     91 
     92   def __getattr__(self, method):

/opt/conda/lib/python3.7/site-packages/static_grader/grader.py in test_cases_grading(question_name, func, test_cases)
     40   for test_case in test_cases:
     41     if inspect.isroutine(func):
---> 42       sub_res = func(*test_case['args'], **test_case['kwargs'])
     43     elif not test_case['args'] and not test_case['kwargs']:
     44       sub_res = func

<ipython-input-87-243f592b48ee> in predict_proba(X)
      1 def positive_probability(model):
      2     def predict_proba(X):
----> 3         return model.predict_proba(X)[:, 1]
      4     return predict_proba
      5 

/opt/conda/lib/python3.7/site-packages/sklearn/utils/metaestimators.py in __get__(self, obj, type)
    108                     continue
    109                 else:
--> 110                     getattr(delegate, self.attribute_name)
    111                     break
    112             else:

AttributeError: 'LinearRegression' object has no attribute 'predict_proba'

Related posts

Recent Viewed