# -*- coding: utf-8 -*-
"""
Created on Sat Mar  2 22:22:27 2019

THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTY OF FITNESS
FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OF THE SOFTWARE.

This script were submitted with a data file called
"Crocodile_genotypes.csv" . The latter should be placed in the same
folder as this python script in order for the program to run.  
"""

import numpy as np

class Parents:
    
    """This class represents two hypothetical parents of a brood, supposing
    that these two parents are the sole contributors to genetic material in
    the brood.
    
    In this program, the hypothetical parents' possible genotypes
    (actually allelotypes) on a considered locus are updated by considering
    sequentially the genotypes (on that locus) of hatchlings in the brood.
    If the genotype (on the current locus) of a hatchling in the brood
    cannot be produced by the possible genotypes (on the current locus)
    of the two hypothetical parents, genetic contribution from a third parent
    is likely.
    
    Attributes:
    ----------
    genotypes: numpy array of the possible genotypes (on the currently
    considered locus) of the two hypothetical parents. As example consider
    the array 
        array([
        [[116 122]
         [126   0]],
        
        [[116 126]
         [126 122]]
        ])
        
        This .genotype array specifies that the two parents must have genotypes
        [116 122] and [126   0] or, alternatively, [116 126] and [126 122]. 0
        means that any allele is possible at the correspoding location.
    
    has_hatchling: boolean; whether the Parents object has at least one
        hatchling asigned to it.
    
    
    Methods:
    -------
    allele_present(allele, pos_genotype_index, parent_index)
        params:
            allele: a unique identifier for alleles (on a particular locus).
            In this case, nucleotide basis length.
            
            pos_genotype_index: index of the possible genotype (on the current
            locus). Note that more than one set of possible parental genotypes
            may exist for some broods (particularly small broods),
            hence the need for an index. This index can be 0 or 1.
            
            parent_index: the index of a hypothetical parent.
            This index can be 0 or 1.
            
        returns:
            boolean (i.e., True or False) present; whether an allele is 
            definitely present in the given set of possible parental genotypes
            (specified by pos_genotype_index) for a given parent (specified
            by parent_index). If pos_genotype_index (parent_index) is None,
            the definite presence of the allele in any pos_genotype_index
            (parent_index) is returned.
      
    allele_possibly_present(allele, pos_genotype_index, parent_index)
        params:
            same as for allele_present given above.
        
        returns:
            boolean (i.e., True or False) present; whether an allele may be
            present in the given set of possible parental genotypes for a given
            parent. If pos_genotype_index (parent_index) is None, the possible
            presence of the allele in any pos_genotype_index (parent_index) is
            returned.
    
    """
    def __init__(self):
        genotypes = np.array([np.zeros((2,2), dtype=int)])
        self.genotypes = genotypes
        self.has_hatchling = False
    
    def allele_present(self, allele, pos_genotype_index=None,\
                       parent_index=None):
        if pos_genotype_index is None and parent_index is None:
            present = allele in self.genotypes
        elif pos_genotype_index is None and parent_index is not None:
            present = allele in self.genotypes[:,parent_index,:] 
        elif pos_genotype_index is not None and parent_index is None:
            present = allele in self.genotypes[pos_genotype_index,:,:] 
        else:
            present = allele in self.genotypes[pos_genotype_index,\
                                               parent_index,:] 
        return present
        
    def allele_possibly_present(self, allele, pos_genotype_index=None,\
                                parent_index=None):
        if pos_genotype_index is None and parent_index is None:
            present = allele in self.genotypes or 0 in self.genotypes
        elif pos_genotype_index is None and parent_index is not None:
            present = allele in self.genotypes[:,parent_index,:] or \
                                       0 in self.genotypes[:,parent_index,:]
        elif pos_genotype_index is not None and parent_index is None:
            present = allele in self.genotypes[pos_genotype_index,:,:]\
                    or 0 in self.genotypes[pos_genotype_index,:,:]
        else:
            present = allele in self.genotypes[pos_genotype_index,\
                                             parent_index,:] or 0 \
                                    in self.genotypes[pos_genotype_index,\
                                                      parent_index,:] 
        return present
    
    
class Hatchling:
    
    """This class represents a Hatchling.
    
    Attributes:
    ----------
    genotype: A numpy array containing the genotype of the hatchling at the 
        current locus. An example is array([276, 294]).
    
    parents: a Parents object representing the two hypothetical parents of
        the brood that the hatchling belongs to.
    
    brood: a Brood object representing the brood a hatchling belongs to.
    
    locus: An int that acts as unique locus identifier.
    
    name: The hatchling's name. The name is unique within a brood, but there
        may be duplicates among broods.
    
    
    Methods:
    -------
    are_parents_possible(based_on, pos_genotype_ind):
        params:
            based_on: can be 'allele_possibly_present' or 'allele_present'.
            
            pos_genotype_ind: index of the possible genotype of the parents
            (on the current locus) to check. Note that more than one set of
            possible parental genotypes may exist for some broods 
            (particularly small broods), hence the need for an index.
        
        returns:
            boolean possible; whether the hypothetical parents are possible,
            given the genotype (on the current locus) of the hatchling.
    
    
    update_parents_genotypes():
        Update the hypothetical parents' genotype (on the current locus), given
        the genotype of the hatchling. Such an update may bring about no change
        in the parents' genotype if the hatchling's genotype can already be 
        reproduced by the alleles present in the parent's genotype.
    
    @classmethod
    load_data(filename):
        loads the data from the path given by 'filename'. The data should be
        saved as .csv. An example of such a data file should be disseminated
        with this script.
    
    """

    def __init__(self, name, locus, brood, genotype, parents=None):
        self.genotype = genotype
        if parents is None:
            parents = Parents()
        self.parents = parents
        self.brood = brood
        self.locus = locus
        self.name = name
         
    def are_parents_possible(self, based_on = 'allele_possibly_present',\
                             pos_genotype_ind=None):
        possible = False
        if np.all(self.genotype == np.array([-1,-1])): #If no data from laboratory about genotype of hatchling on locus.
            return True
        
        for jj in range(self.parents.genotypes.shape[0]):
            if pos_genotype_ind is None or jj == pos_genotype_ind:
                for oo in range(self.parents.genotypes[0].shape[0]):
                    if based_on == 'allele_possibly_present':
                        if self.parents.allele_possibly_present\
                           (self.genotype[0], jj, oo) and\
                            self.parents.allele_possibly_present\
                           (self.genotype[1], jj, 1 - oo):
                            possible = True
                        
                    elif  based_on == 'allele_present':
                        if self.parents.allele_present\
                           (self.genotype[0], jj, oo) and\
                            self.parents.allele_present\
                           (self.genotype[1], jj, 1 - oo):
                            possible = True
                        
        return possible
    
    def update_parents_genotypes(self):
        parents = self.parents
        if np.all(self.genotype == np.array([-1,-1])): #if no data was recorded for the hatchling on the locus (i.e., it was assigned [-1,-1]), one has no information about parental genotype.
            return
        
        for jj in range(parents.genotypes.shape[0]): #delete the possible parantal genotypes that could not possibly produce the hatchling. If self.parents.genotype was [[[a,b],[c,d]] , [[e,f],[g,h]]] and hatchling has genotype [a,c], delete [[e,f],[g,h]] from parental genotype. 
            if not self.are_parents_possible(pos_genotype_ind = jj):
                try:
                    parents.genotypes = np.delete(parents.genotypes,\
                                                  jj, axis=0)
                except:
                    pass
                
        if not parents.has_hatchling:
            parents.has_hatchling = True
            parents.genotypes[0, 0, 0] = self.genotype[0]
            parents.genotypes[0, 1, 0] = self.genotype[1]
            
        if not self.are_parents_possible(based_on = 'allele_present'):
            if parents.genotypes[0, 0, 1] == 0 and\
               parents.genotypes[0, 1, 1] == 0: #if parental genotype is [[[a,0],[b,0]]] and hatchling is [c,d], make parental genotype [[[a,c],[b,d]] , [[a,d],[b,c]]]
            
                parents.genotypes = np.append(parents.genotypes,\
                                            [parents.genotypes[0,:,:]], axis=0)
                parents.genotypes[0, :, 1] = self.genotype
                parents.genotypes[1, :, 1] = self.genotype[::-1]
                
                for ii in range(parents.genotypes.shape[0]): #[[[a,a],[b,c]]] --> [[[a,0],[b,c]]] 
                    for oo in range(parents.genotypes.shape[1]):
                        if parents.genotypes[ii, oo, 1] \
                            == parents.genotypes[ii, oo, 0]:
                            parents.genotypes[ii, oo, 1] = 0
            
                if np.all(parents.genotypes[0, :, :]\
                          == parents.genotypes[1, :, :]): #[[[a,b],[c,d]] , [[a,b],[c,d]]] --> [[[a,b],[c,d]]]
                    parents.genotypes = parents.genotypes[:1, :, :]
            
            else:
                for ii in range(parents.genotypes.shape[0]):
                    zerind = np.where(parents.genotypes[ii,:,:]==0)
                    for jj in range(2):
                        if not self.genotype[jj] in\
                               parents.genotypes[ii, zerind[0],:] and\
                               self.genotype[1-jj] in\
                               parents.genotypes[ii, 1 - zerind[0],:]: #if parental genotype is [[[a,b],[c,0]]] and hatchling is [b,d], make parenetal genotype[[[a,b],[c,d]]]
                                   parents.genotypes[ii, zerind[0], zerind[1]]\
                                   = self.genotype[jj]

    @classmethod
    def load_data(cls, filename):
        brood = []
        hatchling_name = []
        hatchling_ID = []
        dat = []
        loci = []
        with open(filename, 'r') as f:
            for ii, line in enumerate(f):
                line = line.strip('\n')
                line = line.split(',')
                if ii == 0:
                    line = line[2:]
                    for ll in line[::2]:
                        loci.append(int(ll.split('_')[0].strip('Locus')))
                        
                else:
                    hatchling_ID.append(ii)
                    brood.append(int(line[0]))
                    hatchling_name.append(int(line[1]))
                    line = line[2:]
    
                    for jj, dd in enumerate(line):
                        if dd == '':
                            line[jj] = -1
                        else:
                            line[jj] = int(dd)
                    dat.append(line)
                    
        dat = np.array(dat)
        
        return dat, loci, brood, hatchling_name, hatchling_ID
                    
            
class Brood:

        
    """This class represents a brood.
    
    Attributes:
    ----------
    hatchlings: A list of hatchlings (Hatchling objects) belonging to the 
        brood.
    
    name: The brood's name.
    
    parents: a Parents object representing the two hypothetical parents
        contributing to the brood.
    

    Methods:
    -------
    add_hatchling():
        Adds a hatchling to the brood.
    
    is_single_mother_possible():
        returns:
            boolean possible: whether it is possible that only one mother
            contributed to all genetic material in a brood (on the current
            locus).
    
    
    """
    
    def __init__(self, brood_name):
        self.hatchlings = []
        self.name = brood_name
        self.parents = Parents()
        
    def add_hatchling(self, hatchling):
        self.hatchlings.append(hatchling)
        
    def is_single_mother_possible(self):
        ii = 0
        while -1 in self.hatchlings[ii].genotype: #do not consider hatchlings with no information on the locus (if no information, genotype will be [-1,-1]).
            ii += 1        
        _chosen_hatchling_gntp = self.hatchlings[ii].genotype #Consider the genotype of any of the hatchling's with sufficient information. We'll choose the first such hatchling.
            
        possible = False
        
        alleles_in_brood = [] #make a list of all the unique alleles in the brood
        for bb in self.hatchlings:
            for al in bb.genotype:
                if al not in alleles_in_brood:
                    alleles_in_brood.append(al)
        
        while not possible:
            try:
                al = alleles_in_brood.pop()
            except:
                break
            mm0 = np.zeros((len(self.hatchlings),))
            mm1 = np.zeros((len(self.hatchlings),))
            for ii, bb in enumerate(self.hatchlings): #the brood [[a,b], [c,c], [a,d], [c,e]] may have the same mother because all hatchlings contain either a or c (the mother's genotype would have been [a,c]) 
                if _chosen_hatchling_gntp[0] in bb.genotype or al in bb.genotype\
                or -1 in bb.genotype:
                    mm0[ii] = 1
                if _chosen_hatchling_gntp[1] in bb.genotype or al in bb.genotype\
                or -1 in bb.genotype:
                    mm1[ii] = 1
            if np.all(mm0) or np.all(mm1):
                possible = True
        
    
        return possible
                    


if __name__ == '__main__':
    dat, loci, brood_names, hatchling_names, hatchling_IDs =\
    Hatchling.load_data('Crocodile_genotypes.csv')
    
    output_3_parents = []
    output_single_mother = []
    
    for ll, lo in enumerate(loci):
        broods = {}
        for ii, hl in enumerate(hatchling_IDs):
            try:
                brood = broods[brood_names[ii]]
            except:
                brood = Brood(brood_names[ii])
                broods[brood_names[ii]] = brood
            
            parents = brood.parents        
            hatchling = Hatchling(name=hatchling_names[ii], locus=lo,\
                                  brood=brood,\
                                  genotype=dat[ii, 2*ll : 2*ll+2],\
                                  parents=parents)
    
            parents_possible = hatchling.are_parents_possible()
            
            if parents_possible:
                hatchling.update_parents_genotypes()
            else:
                output_3_parents.append([brood.name,\
                                         list(hatchling.parents.genotypes),\
                                         lo, np.array([ht.name for ht\
                                                       in brood.hatchlings]),\
                                        hatchling.name, hatchling.genotype])
                
            brood.add_hatchling(hatchling)
            
        for cl in broods.keys():
            if not broods[cl].is_single_mother_possible():
                output_single_mother.append(cl.name)
    
    
    for o3p in output_3_parents:
        par_gntps = str(o3p[1][0])
        try:
            par_gntps += '\n or \n ' + str(o3p[1][1])
        except:
            pass
        o3p[1] = par_gntps
        printstring = ('There is evidence that more than two parents\
 contributed to brood {}, since the hypothetical parents must have genotypes\
 \n\
 {}\n\
on locus {} (as determined from hatchlings {}), but hatchling {} has\
 genotype {}.\n\n')
        print(printstring.format(*o3p))
        
    if len(output_3_parents) == 0:
        print('There is no evidence for more than two parents contributing\
 to any of the broods')
    
    for osd in output_single_mother:
        printstring = ('There is evidence that more than one mother contributed\
 genetically to broods {}.')
        print(printstring.format(osd))
    
    if len(output_single_mother) == 0:
        print('There is no evidence that more than one mother contributed\
 genetically to any of the broods.')
            
