#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h> // for mkdir
#include <inttypes.h>
#include <stdint.h>
#include <algorithm> // for max/min
#include <vector> // for sorting_kmers
#include <sys/time.h>

#define NNKS 4 // default minimal abundance for solidity

int max_memory; // the most memory one should alloc at any time, in MB
int order=0; // in minigraph, don't change it, it should be 0

#include "minia/Bank.h"
#include "minia/Hash16.h"
#include "minia/Set.h"
#include "minia/Pool.h"
#include "minia/Bloom.h"
#include "minia/Debloom.h"
#include "minia/Utils.h"
#include "minia/SortingCount.h"
#include "minia/Terminator.h"
#include "minia/Kmer.h"
#include "minia/GraphOutput.h"
#include "minia/rvalues.h" // for 4bloom

int64_t genome_size;
Bloom * bloo1;

FILE *linear_seqs_file;
string linear_seqs_name;
const string linear_seqs_suffix = ".linearSeqs";

int graph_format = 0; // 0 = dot, 1 = kissplice, 2 = xgmml, 3 = json

void construct_linear_seqs()
{

    kmer_type branching_kmer, kmer;
    char kmer_seq[sizeKmer+1];

    BinaryBank *SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),0);

    BranchingTerminator *terminator;
    terminator = new BranchingTerminator(SolidKmers,genome_size, bloo1,false_positives);

   /* RandomBranchingTraversal *traversal = new RandomBranchingTraversal(bloo1,false_positives,terminator);
    printf("\n\nWARNING! random traversal\n\n");*/
    SimplePathsTraversal *traversal = new SimplePathsTraversal(bloo1,false_positives,terminator);
    //MonumentTraversal *traversal = new MonumentTraversal(bloo1,false_positives,terminator);

    long long nbNodes = 0;
    long long totalnt=0;
    long long mlenleft=0,mlenright=0;
    int64_t NbBranchingKmer=0;
    long long len_left = 0;
    long long len_right = 0;
    long long contig_len =0;
    long long maxlen=10000000;
    char *left_traversal  = (char *) malloc(maxlen/2*sizeof(char));
    char *right_traversal = (char *) malloc(maxlen/2*sizeof(char));
    char *node          = (char *) malloc(maxlen*sizeof(char));

    linear_seqs_file = fopen((char * )linear_seqs_name.c_str(),"w");

    printf("starting nodes construction\n");
    STARTWALL(nodes);

    while (terminator->next(&branching_kmer))
    {
        while (traversal->get_new_starting_node(branching_kmer,kmer))
        {
            code2seq(kmer,kmer_seq); // convert starting kmer to nucleotide seq
            //printf("new starting node: %s\n",kmer_seq);
            
            // right extension
            len_right = traversal->traverse(kmer,right_traversal,0);
            mlenright= max(len_right,mlenright);

            // left extension, is equivalent to right extension of the revcomp
            len_left = traversal->traverse(kmer,left_traversal,1);
            mlenleft= max(len_left,mlenleft);

            // form the node 
            revcomp_sequence(left_traversal,len_left);
            strcpy(node,left_traversal); // node = revcomp(left_traversal)
            strcat(node,kmer_seq);//               + starting_kmer
            strcat(node,right_traversal);//           + right_traversal

            int node_len=len_left+len_right+sizeKmer;

            // save the node
            fprintf(linear_seqs_file,">%lli__len__%i \n",nbNodes,node_len);
            fprintf(linear_seqs_file,"%s\n",node);
            nbNodes++;
            totalnt+=node_len;
        }

        NbBranchingKmer++;
        if ((NbBranchingKmer%300)==0) fprintf (stderr,"%cLooping through branching kmer n° %lld / %lld     ",13,NbBranchingKmer,terminator->nb_branching_kmers);
    }

    fprintf(stderr, "\n");
    STOPWALL(nodes,"nodes construction");

    // TODO: do another pass of kmers to detect what we missed:
    // - perfectly circular regions without any branching (btw, stop as soon as a branching kmer is detected..)
    // - that's it

    delete terminator;
    fclose(linear_seqs_file);
    free(left_traversal);
    free(right_traversal);
    free(node);
    SolidKmers->close();
}

int main(int argc, char *argv[])
{
    
    if(argc <  6)
    {
        fprintf (stderr,"usage:\n");
        fprintf (stderr," %s reads_file kmer_size min_abundance estimated_genome_size prefix [--json] [--kissplice] [--4bloom]\n",argv[0]);
        fprintf (stderr,"hints:\n reads_file is either a fasta/fastq/fasta.gz/fastq.gz or a text file containing a reads file at each line\n min_abundance ~ 3\n estimated_genome_size is in bp, does not need to be accurate, only controls memory usage\n prefix is any name you want\n with the --4bloom option the representation with 4 Bloom filters is used instead of the standard one\n");

        return 0;
    }
    int FOUR_BLOOM_VERSION = 0;


      // shortcuts to go directly to assembly using serialized bloom and serialized hash
    int START_FROM_SOLID_KMERS=0; // if = 0, construct the fasta file of solid kmers, if = 1, start directly from that file 
    int LOAD_FALSE_POSITIVE_KMERS=0; // if = 0, construct the fasta file of false positive kmers (debloom), if = 1, load that file into the hashtable
    int NO_FALSE_POSITIVES_AT_ALL=0; // if = 0, normal behavior, if = 1, don't load false positives (will be a probabilistic de bruijn graph)
    for (int n_a = 6; n_a < argc ; n_a++)
    {
        if (strcmp(argv[n_a],"--4bloom") == 0)
	    FOUR_BLOOM_VERSION = 1;

        if (strcmp(argv[n_a],"--dont-count")==0)
            START_FROM_SOLID_KMERS = 1;

        if (strcmp(argv[n_a],"--dont-debloom")==0)
            LOAD_FALSE_POSITIVE_KMERS = 1;

        if (strcmp(argv[n_a],"--just-graph")==0)
        {
            START_FROM_SOLID_KMERS = 1;
            LOAD_FALSE_POSITIVE_KMERS = 1;
        }

        if (strcmp(argv[n_a],"--json")==0)
            graph_format = 3;

        if (strcmp(argv[n_a],"--kissplice")==0)
            graph_format = 1;
    }

    // kmer size
    sizeKmer=27; // let's make it even for now, because i havnt thought of how to handle palindromes (dont want to stop on them)
    if(argc >=  3)
    {
        sizeKmer = atoi(argv[2]);
        if (sizeKmer%2==0)
        {
            sizeKmer-=1;
	    printf("Need odd kmer size to avoid palindromes. I've set kmer size to %d.\n",sizeKmer);
        }
        if (sizeKmer>=(sizeof(kmer_type)*4))
        {
            printf("Max kmer size on this compiled version is %d\n",sizeof(kmer_type)*4-2);

            exit(1);
        }
    }

    kmerMask=(((kmer_type)1)<<(sizeKmer*2))-1;
    double lg2 = log(2);

    if (!FOUR_BLOOM_VERSION) 
      NBITS_PER_KMER = log(16*sizeKmer*(lg2*lg2))/(lg2*lg2); // needed to process argv[5]
    else 
      NBITS_PER_KMER = rvalues[sizeKmer][1];

    // solidity 
    nks =NNKS;
    if(argc >=  4)
    {
        nks = atoi(argv[3]);
    }

   if(argc >=  5)
    {
       genome_size  = atoll(argv[4]);
       int estimated_bloom_size = max( (int)ceilf(log2f(genome_size * NBITS_PER_KMER )), 1);

       uint64_t estimated_nb_FP =  (uint64_t)(genome_size * 4 * powf(0.6,11)); // just indicative
    
       max_memory = max( (1LL << estimated_bloom_size)/8LL /1024LL/1024LL, 1LL );
       printf("estimated values: nbits Bloom %i, nb FP %lld, max memory %i MB\n",estimated_bloom_size,estimated_nb_FP,max_memory);
    }

    // output prefix
    if(argc >=  6)
    {
        strcpy(prefix,argv[5]);
    }

  
    fprintf (stderr,"taille cell %lu \n", sizeof(cell<kmer_type>));

    STARTWALL(0);

    Bank *Reads = new Bank(argv[1]);
    
    // count kmers, write solid kmers + count to disk
    if (!START_FROM_SOLID_KMERS)
    {
        int max_disk_space = 0; // let dsk decide
        int verbose = 0;

        sorting_count(Reads, prefix, max_memory, max_disk_space, true, verbose);

        // convert [solid kmers with count] to [solid kmers without count]
        string solid_kmers_with_count_filename = return_file_name(solid_kmers_file);
        solid_kmers_with_count_filename += "_with_count";
        rename(return_file_name(solid_kmers_file),solid_kmers_with_count_filename.c_str());
        BinaryBank *SolidKmersWithCount = new BinaryBank((char *)solid_kmers_with_count_filename.c_str(),sizeof(kmer_type),false);
        BinaryBank * SolidKmersWithoutCount = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),true);
        uint64_t osef; 
        uint_abundance_t abundance;
        kmer_type kmer;
        SolidKmersWithCount->read(&osef, 8); //read the header
        while (SolidKmersWithCount->read_element(&kmer))
        {
            SolidKmersWithoutCount->write_element(&kmer);
            SolidKmersWithCount->read(&abundance, sizeof(abundance));
        }
        SolidKmersWithCount->close();
        SolidKmersWithoutCount->close();
    }
    delete Reads;
    
    STARTWALL(buildDBG);
    // debloom, write false positives to disk, insert them into false_positives
    if (! LOAD_FALSE_POSITIVE_KMERS)
    {
        debloom(order, max_memory);
    }
    
    bloo1 = bloom_create_bloo1((BloomCpt *)NULL);

    if (! NO_FALSE_POSITIVES_AT_ALL)
    {
        // load false positives from disk into false_positives
        if (!FOUR_BLOOM_VERSION) 
            false_positives = load_false_positives();
	else
	    false_positives = load_false_positives_cascading4();
    }
    else
    {
        // titus mode: no FP's
        dummy_false_positives();
    }
    STOPWALL(buildDBG, "build DBG");

    //////-------------------------------------------------------------------------------------------
    fprintf (stderr,"______________________________________________________ \n");
    fprintf (stderr,"_______________________________________ minigraph_____ \n");
    fprintf (stderr,"______________________________________________________ \n\n");

    //////-------------------------------------------------------------------------------------------

    linear_seqs_name=(prefix+linear_seqs_suffix);

    construct_linear_seqs(); 

    delete bloo1;

    // start a graph
    
    GraphOutput graph = GraphOutput(prefix,graph_format);
    
    graph.load_nodes_extremities(linear_seqs_name);
    
    printf("starting edges construction\n");
    STARTWALL(edges);

    graph.construct_graph(linear_seqs_name);

    STOPWALL(edges,"edges construction");
    
    graph.close();

    
    //////-------------------------------------------------------------------------------------------
    
    STOPWALL(0,"Total");

    return 0;
}


