[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[creduce-bugs] clang-delta crash report



To creduce developers. Thanks for creating a great tool! 


Here is a bug report. This is with clang 3.8, creduce 2.4, 

Command:
/tools/local/creduce-2.4.0/bin/creduce --sanitize  --verbose --save-temps reduce_crash.sh /net/pc163.smi.local/local_disk/mvillmow/sw/tools/pviz/src/control_flow_analysis.cpp
successfully checked prereqs for pass_blank
successfully checked prereqs for pass_clang_binsrch
successfully checked prereqs for pass_lines
successfully checked prereqs for pass_special
successfully checked prereqs for pass_ternary
successfully checked prereqs for pass_balanced
successfully checked prereqs for pass_clang
successfully checked prereqs for pass_peep
successfully checked prereqs for pass_ints
successfully checked prereqs for pass_indent
successfully checked prereqs for pass_clex

===< 14664 >===
running 8 interestingness test(s) in parallel
INITIAL PASSES

===< pass_blank :: 0 >===
failure
[0 pass_blank :: 0 s:0 f:1] 
===< pass_clang_binsrch :: replace-function-def-with-decl >===
initial granularity = 76
TRANSFORM: index = 1, chunk = 76, instances = 76
"/tools/local/creduce-2.4.0/libexec/clang_delta" --transformation=replace-function-def-with-decl --counter=1 --to-counter=76 /net/pc163.smi.local/local_disk/mvillmow/sw/tools/pviz/src/control_flow_analysis.cpp
ADVANCE: index = 77, chunk = 76
TRANSFORM: index = 77, chunk = 76, instances = 76
granularity = 38
TRANSFORM: index = 1, chunk = 38, instances = 76
"/tools/local/creduce-2.4.0/libexec/clang_delta" --transformation=replace-function-def-with-decl --counter=1 --to-counter=39 /net/pc163.smi.local/local_disk/mvillmow/sw/tools/pviz/src/control_flow_analysis.cpp
sh: line 1: 14723 Segmentation fault      "/tools/local/creduce-2.4.0/libexec/clang_delta" --transformation=replace-function-def-with-decl --counter=1 --to-counter=39 /net/pc163.smi.local/local_disk/mvillmow/sw/tools/pviz/src/control_flow_analysis.cpp > /tmp/_AyfXntWUI


=======================================

OOPS: clang_delta crashed; please consider mailing
clang_delta_crash_tmp__AyfXntWUI.cpp
to creduce-bugs@flux.utah.edu and we will try to fix the bug
please also let us know what version of C-Reduce you are using

=======================================

reduce_crash.sh
#!/bin/bash
TESTCASE=${1:-control_flow_analysis.cpp}
/tools/local/clang+llvm-3.8.0-x86_64/bin/clang-tidy -checks=cert-* --extra-arg="-std=gnu++14" $TESTCASE -- -I/tools/local/gcc-4.9.2/include/c++/4.9.2 -I/net/pc163.smi.local/local_disk/mvillmow/sw/tools/pviz/src -I/net/pc163.smi.local/local_disk/mvillmow/sw/tools/pviz/src/libgexf -I/net/pc163.smi.local/local_disk/mvillmow/pviz45d/install/fir/4.5/current/debug_smiv45/x86_64/include -I/sw/components/codec/4.5/r70677/debug_smiv45/x86_64/include -I/sw/components/bt_gen/4.5/r70677/debug_smiv45/sm/include -I/arch/components/psim/4.5/r21531/release_smiv45/x86_64/include -I/usr/include/libxml2 -mbmi -msse2 -Wall -Wextra -m64 -DTOOL_MODE=0 -DARCH_VERSION=ARCH_VERSION_4_5_0 -DSMI_V45 -fopenmp -D__STDC_FORMAT_MACROS -std=gnu++14 2>&1 | grep Use
if ! test $? = 0; then
   exit 1
fi
exit 0

File is attached.

Hope this helps!
Micah Villmow
// -----------------------------------------------------------------------
//
//   Copyright (c) 2013 Softmachines Inc. - All Rights Reserved
//
//   This source module contains confidential and proprietary information
//   of Softmachines Inc.  It is not to be disclosed or used except
//   in accordance with applicable agreements.  This copyright notice does
//   not evidence any actual or intended publication of such source code.
//
// -----------------------------------------------------------------------
#include <ctype.h>
#include <err.h>
#include <cstring>
#include <cassert>
#include "disasm_log.h"
#include <vector>
#include <stdint.h>
#include <unordered_map>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <fstream>
#include <algorithm>
#include <map>
#include <list>
#include <set>
#include <functional>
#include <algorithm>
#include "path_parser.h"
#include "disasm_block.h"

options o;
char *fname;

static void buildGraphFromPsimLog();
static bool convertBlocksToVector();
static void computeDominators();
static void printBlocks();
static void detectLoopsOrGraph();
static void printAllPathPossibilities(Block *n1, Block *n2);
static void printOnlyPathPossibilities();
static void printOnlySuccessors();
static void printOnlyPredecessors();
static void printDominators();
static void searchDeadCode();
static void searchBranchAssert();
static void detectBranchCorrelations();
static void detectBranchPredicationOpportunities();
static void statisticAboutLoopTripCounts();

Block *entry;
std::map<uint32_t, std::map<uint32_t, Block* > > *gva_nva_allblocks;
std::vector<Block*> *allblocks;
std::map<uint32_t, std::vector<uint32_t> > *dominators;
std::list<Block*> *temporalblocks;
static bool dominatorscomputed;
uint32_t stats_blocks_created;
static unsigned lastCommitCC = 0;

enum flags {
    FLAG_MP = 1 << 0,
    FLAG_BT = 1 << 1,
    FLAG_BN = 1 << 2,
};

enum filter_state_e {
    NO_FILTER,
    NO_APP_SEEN,
    NEXT_BRANCH
};

static bool
parseArgs(int argc, char* argv[])
{
    memset(&o, 0, sizeof(o));
    fname = argv[argc-1];

    for (int x = 1; x < argc; ++x)
    {
        if (argv[x][0] == '-') {
            switch (argv[x][1]) {
                case 'h': o.help = true; break;
                case 'n': o.withNVA = true; break;
                case 'e': o.printPathes = true; break;
                case 'a': o.printPathesOnly = true; break;
                case 'S': o.printSuccOnly = true; break;
                case 'P': o.printPredOnly = true; break;
                case 'g': o.genGraph = true; break;
                case 'G': o.genGraph = true; o.genGraphHot = true; break;
                case 'i': o.branchHintInd = true; o.withNVA = true; break;
                case 'c': o.branchHintCond = true; o.withNVA = true; break;
                case 'p': o.branchPredication = true; o.withNVA = true; break;
                case 's': o.clusterGraph = true; break;
                case 'D': o.printDominators = true; break;
                case 'd': o.searchDeadCode = true; o.withNVA = true; break;
                case 't': o.searchBranchAssert = true; o.withNVA = true; break;
                case 'B': o.printBlocks = true; o.withNVA = true; break;
                case 'l': o.loopStatistics = true; o.withNVA = true; break;
                case 'm': o.limitMemory = true; break;
                case '-':
                    {
                        if (!strncmp(argv[x] + 2, "start-address", 13)) {
                            ++x;
                            ASSERT(x < argc);
                            std::stringstream str;
                            str << std::hex << argv[x];
                            str >> o.startAddress;
                        } else if (!strncmp(argv[x] + 2, "end-address", 11)) {
                            ++x;
                            ASSERT(x < argc);
                            std::stringstream str;
                            str << std::hex << argv[x];
                            str >> o.endAddress;
                        } else if (!strncmp(argv[x] + 2, "entry-address", 13)) {
                            ++x;
                            ASSERT(x < argc);
                            std::stringstream str;
                            str << std::hex << argv[x];
                            str >> o.entryAddress;
                        } else if (!strncmp(argv[x] + 2, "bh-mp-thres", 11)) {
                            ++x;
                            ASSERT(x < argc);
                            std::stringstream str;
                            str << std::dec << argv[x];
                            str >> o.bhMPThreshold;
                        } else if (!strncmp(argv[x] + 2, "bh-pc-thres", 11)) {
                            ++x;
                            ASSERT(x < argc);
                            std::stringstream str;
                            str << std::dec << argv[x];
                            str >> o.bhPathConfThreshold;
                        } else if (!strncmp(argv[x] + 2, "bh-tp-thres", 11)) {
                            ++x;
                            ASSERT(x < argc);
                            std::stringstream str;
                            str << std::dec << argv[x];
                            str >> o.bhTotalPathThreshold;
                        } else if (!strncmp(argv[x] + 2, "bh-ic-thres", 11)) {
                            ++x;
                            ASSERT(x < argc);
                            std::stringstream str;
                            str << std::dec << argv[x];
                            str >> o.bhICThreshold;
                        } else if (!strncmp(argv[x] + 2, "skip", 4)) {
                            ++x;
                            ASSERT(x < argc);
                            std::stringstream str;
                            str << std::dec << argv[x];
                            str >> o.skipDisasm;
                        } else {
                            return true;
                        }
                    }
                    break;
                default: return true;
            }
        } else {
            if (x != (argc - 1)) return true;
        }
    }
    if (o.printPathesOnly) {
        if (!(o.startAddress && o.endAddress) || o.printSuccOnly
            || o.printPredOnly)
            return true;
    }
    if (o.printSuccOnly || o.printPredOnly || o.printDominators) {
        if (!o.startAddress)
            return true;
    }
    if (o.clusterGraph) {
        if (!(o.genGraph && o.withNVA))
            return true;
    }

    return o.help;
}

void
init()
{
    dominatorscomputed = false;
    entry = 0;
    gva_nva_allblocks = new std::map<uint32_t, std::map<uint32_t, Block*> >;
    dominators = new std::map<uint32_t, std::vector<uint32_t> >;
    temporalblocks = new std::list<Block*>;
}

int
main(int argc, char *argv[]) {
    memset(&o, 0, sizeof(o));

    if (parseArgs(argc, argv)) {
        std::cerr << "Usage: " << argv[0] << " FILE\n";
        std::cerr << "-h               - Display help menu.\n";
        std::cerr << "-n               - With respect to NVA.\n";
        std::cerr << "-e               - Print all possible pathes for each loop.\n";
        std::cerr << "-f               - Filter only NVA addresses 0xa1000000 <= NVA <= 0xa4000000.\n";
        std::cerr << "-a               - Only print all possible pathes for start GVA - end GVA. Needs --start-address and --end-address\n";
        std::cerr << "-S               - Only print all possible successors for NVA. Needs --start-address\n";
        std::cerr << "-P               - Only print all possible predecessors for NVA. Needs --start-address\n";
        std::cerr << "-g               - Generate dotty graph, combinable with other options\n";
        std::cerr << "-G               - Generate dotty graph, but only hotpath, combinable with other options (also see --bh-tp-thres)\n";
        std::cerr << "-s               - Cluster nodes according to GVA. Needs -g -n\n";
        std::cerr << "-p               - Look for predication opportunities (also see --bh-mp-thres)\n";
        std::cerr << "-i               - Look for indirect branch hint opportunities (also see --bh-mp-thres, --bh-pc-thres and --bh-tp-thres)\n";
        std::cerr << "-c               - Look for conditioal branch hint opportunities (also see --bh-mp-thres, --bh-pc-thres and --bh-tp-thres)\n";
        std::cerr << "-D               - Print dominators of --start-address\n";
        std::cerr << "-d               - Search for dead code in dynamic trace\n";
        std::cerr << "-t               - Search for branch-as-assert opportunities (also see --bh-tp-thres)\n";
        std::cerr << "-B               - Print basic blocks with instructions\n";
        std::cerr << "-l               - Print loop statistics\n";
        std::cerr << "-m               - Limit to 32768 blocks (~4GB of memory)\n";
        std::cerr << "--bh-mp-thres    - Mispredict threshold in % for finding branch hint opportunities\n";
        std::cerr << "--bh-pc-thres    - Indirect branch confidence threshold in % for finding branch hint opportunities\n";
        std::cerr << "--bh-tp-thres    - Total path count threshold for finding branch hint opportunities\n";
        std::cerr << "--start-address  - For print pathes only: start address (GVA or NVA).\n";
        std::cerr << "--end-address    - For print pathes only: end address (GVA or NVA).\n";
        return 1;
    }

    init();
    buildGraphFromPsimLog();
    if (!convertBlocksToVector()) {
        std::cout << "Too many blocks, aborting due to memory usage"
                  << std::endl;
        return 0;
    }

    if (o.printPathesOnly) {
        printOnlyPathPossibilities();
    } else if (o.printSuccOnly) {
        printOnlySuccessors();
    } else if (o.printPredOnly) {
        printOnlyPredecessors();
    }  else if (o.printDominators) {
        computeDominators();
        dominatorscomputed = true;
        printDominators();
    } else if (o.branchHintInd || o.branchHintCond) {
        detectBranchCorrelations();
    } else if (o.branchPredication) {
        detectBranchPredicationOpportunities();
    } else if (o.loopStatistics) {
        computeDominators();
        dominatorscomputed = true;
        statisticAboutLoopTripCounts();
    } else if (o.searchDeadCode) {
        searchDeadCode();
    } else if (o.searchBranchAssert) {
        computeDominators();
        searchBranchAssert();
    }
    if (o.genGraph) {
        computeDominators();
        detectLoopsOrGraph();
    }

    if (o.printBlocks) {
        printBlocks();
    }

    std::cout.flush();
    return 0;
}

static bool
isDominator(Block *n1, Block *n2) {
    // does n2 dominate n1?
    auto all_dom_it = dominators->find(n1->getDomId());
    if (all_dom_it == dominators->end())
        return false;
    for (auto dom_it : all_dom_it->second) {
        ASSERT(allblocks->at(dom_it)->getDomId() == dom_it);
        if (allblocks->at(dom_it) == n2)
            return true;
    }
    return false;
}

static void
printAllPathPossibilitiesAux(std::vector<Block*> *currentpath, Block *tail)
{
    if (currentpath->back() == tail) {
        Block *prev = 0;
        for (auto it : *currentpath) {
            if (prev) {
                std::cout << "(" << std::dec
                          << prev->getFreq(it)
                          << ") ";
            }
            std::cout << std::hex << "0x" << it->getGva() << " (" << "0x" << it->getNva() << ")";
            if (!(it == currentpath->back()))
                std::cout << " -> ";
            prev = it;
        }
        std::cout << std::endl;
        return;
    }
    Block *last = currentpath->back();
    for (auto succ_it : *(last->getSucc())) {
        if (succ_it->isVisited())
            continue;
        currentpath->push_back(succ_it);
        currentpath->back()->setVisited(true);
        printAllPathPossibilitiesAux(currentpath, tail);
        currentpath->pop_back();
    }
}

static void
printAllPathPossibilities(Block *n1, Block *n2)
{
    std::vector<Block*> currentpath;
    currentpath.push_back(n1);
    currentpath.back()->setVisited(true);
    std::cout << "Possible pathes:" << std::endl;
    for (auto succ_it : *(n1->getSucc())) {
        currentpath.push_back(succ_it);
        currentpath.back()->setVisited(true);
        printAllPathPossibilitiesAux(&currentpath, n2);
        currentpath.pop_back();
    }
}

static void
printOnlyPredecessors()
{
    for (auto all_nva : *allblocks) {
        if (all_nva->getNva() != o.startAddress)
            continue;

        Block *n1 = all_nva;

        if (!n1) {
            std::cerr << "Start address not found" << std::endl;
            return;
        }

        std::cout << "Predecessors " << std::hex << "0x"
                  << n1->getGva() << " (" << "0x" << n1->getNva()
                  << "):" << std::endl;
        for (auto pred_it : *(n1->getPred())) {
            std::cout << std::hex << "0x" << pred_it->getGva()
                      << " (" << "0x" << pred_it->getNva() << ")"
                      << " (" << std::dec << n1->getFreq(pred_it)
                      << ")";
            if (pred_it != n1->getPred()->back())
                std::cout << ", ";
        }
        std::cout << std::endl;
    }
}

static void
searchBranchAssert()
{
    ASSERT(o.withNVA);
    unsigned int allblocks_size = allblocks->size();

    std::cout << "Benchmark\tBranch block address\t\tBranch address\t\t"
              << "Tail block address\t\tHead block address\t\t"
              << "Backedge frequency" << std::endl;
    std::cout << "\tNVA\tGVA\tNVA\tGVA\tNVA\tGVA\t" << std::endl;

#if WITH_OPENMP
#pragma omp parallel for
#endif
    for (unsigned int n = 0; n < allblocks_size; n++) {
        Block* cur = allblocks->at(n);

        if (cur->getSuccCount() != 1)
            continue;
        if (!cur->getLastInsn()->isBranch())
            continue;

        ASSERT(cur->getSucc()->at(0));
        Instruction *branch = cur->getLastInsn();
        ASSERT(branch->isBranch());

        bool branchtargetinloop = false;
        bool hitentry = false;
        Block *tail = cur;

        while (tail) {
            tail = tail->getSucc()->at(0);
            if (branch->isBranchTargetAddrValid()
                && (branch->getBranchTargetAddr() >= tail->getNva())
                && (branch->getBranchTargetAddr() <= tail->getNvaEnd())) {
                // TODO: this may not hold, target may still be in loop,
                // because branch target might be padded with nop's,
                // but PSIM won't show this.
                branchtargetinloop = true;
            }
            if (tail == entry) {
                hitentry = true;
                break;
            }
            if (tail->getSuccCount() != 1)
                break;
        }
        if (hitentry)
            continue;

        // tail now has more than 1 successor
        bool isloop = false;
        Block *head = nullptr;

        for (auto succ : *(tail->getSucc())) {
            if (isDominator(tail, succ)) {
                head = succ;
                isloop = true;
                break;
            }
        }
        if (!isloop)
            continue;
        ASSERT(head);
        if (!isDominator(cur, head))
            continue;
        if (!isDominator(tail, cur))
            continue;
        if (cur->getFreq(cur->getSucc()->at(0)) < o.bhTotalPathThreshold)
            continue;

#if WITH_OPENMP
        #pragma omp critical
#endif
        {
            std::cout << fname << "\t0x" << std::hex << cur->getNva()
                      << "\t0x" << cur->getGva() << "\t0x" << cur->getNvaEnd()
                      << "\t0x" << cur->getGvaEnd() << "\t0x" << tail->getNva()
                      << "\t0x" << tail->getGva() << "\t0x" << head->getNva()
                      << "\t0x" << head->getGva() << "\t" << std::dec
                      << tail->getFreq(head);

            if (!branch->isBranchTargetAddrValid()) {
                if (branch->isUncondBranch())
                    std::cout << "\t<Unconditional branch>\t";
                else
                    std::cout << "\t<Unknown target>\t";
            }
            else if (branchtargetinloop)
                std::cout << "\t<Stays in loop>\t";
            else
                std::cout << "\t<Leaves loop>\t";
            std::cout << branch->getIasm() << std::endl;
        }
    }
}

static void
searchDeadCode()
{
    ASSERT(temporalblocks->size() != 0);
    unsigned long long currentLiveUnused = 0;
    unsigned currentLiveGFRUnused = 0;
    for (auto curblock : *temporalblocks) {
        for (auto curinstr : *(curblock->getInsts())) {
            if (curinstr->isCommit())
                continue;

            // Mask out used inputs
            currentLiveUnused = currentLiveUnused & ~curinstr->getRegIns();
            currentLiveGFRUnused = currentLiveGFRUnused & ~curinstr->getRegInsGFR();

            if (!curinstr->isSuba32r29()) {
                if (currentLiveUnused & curinstr->getRegOuts()) {
                    std::cout << "Instruction " << std::hex << "0x" << curinstr->getNva()
                              << ": " << curinstr->getIasm() << " (Dests)"
                              << std::endl;
                } else if (currentLiveGFRUnused & curinstr->getRegOutsGFR()) {
                    std::cout << "Instruction " << std::hex << "0x" << curinstr->getNva()
                              << ": " << curinstr->getIasm() << " (GFR dests)"
                              << std::endl;
                }
            }

            if (curinstr->isSuba32r29())
                continue;

            currentLiveUnused |= curinstr->getRegOuts();
            currentLiveGFRUnused |= curinstr->getRegOutsGFR();
        }
    }
}

static void
printDominators()
{
    for (auto all_nva : *allblocks) {
        if (all_nva->getNva() != o.startAddress)
            continue;

        Block *n1 = all_nva;

        if (!n1) {
            std::cerr << "Start address not found" << std::endl;
            return;
        }

        std::cout << std::hex << "Dominators of 0x"
                  << (o.withNVA ? n1->getNva() : n1->getGva())
                  << std::endl;

        for (auto dom_it = (*dominators)[n1->getDomId()].begin();
             dom_it != (*dominators)[n1->getDomId()].end(); ++dom_it) {
            ASSERT(allblocks->at(*dom_it)->getDomId() == *dom_it);
            std::cout << std::hex << (o.withNVA ? allblocks->at(*dom_it)->getNva() :
                                      allblocks->at(*dom_it)->getGva());
            if (dom_it != (*dominators)[n1->getDomId()].end())
                std::cout << ", ";
        }
        std::cout << std::endl;
    }
}

static void
printOnlySuccessors()
{
    for (auto all_nva : *allblocks) {
        if (all_nva->getNva() != o.startAddress)
            continue;

        Block *n1 = all_nva;

        if (!n1) {
            std::cerr << "Start address not found" << std::endl;
            return;
        }

        std::cout << "Successors " << std::hex << "0x" << n1->getGva()
                  << " (" << "0x" << n1->getNva() << "):" << std::endl;
        for (auto succ_it : *(n1->getSucc())) {
            std::cout << std::hex << "0x" << succ_it->getGva()
                      << " (" << "0x" << succ_it->getNva() << ")"
                      << " (" << std::dec
                      << n1->getFreq(succ_it)
                      << ")";
            if (succ_it != n1->getSucc()->back())
                std::cout << ", ";
        }
        std::cout << std::endl;
    }
}

static void
printOnlyPathPossibilities()
{
    for (auto all_nva1 : *allblocks) {
        if (all_nva1->getNva() != o.startAddress)
            continue;

        Block *n1 = all_nva1;

        for (auto all_nva2 : *allblocks) {
            if (all_nva2->getNva() != o.endAddress)
                continue;

            Block *n2 = all_nva2;

            if (!n1) {
                std::cerr << "Start GVA not found" << std::endl;
                return;
            } else if (!n2) {
                std::cerr << "End GVA not found" << std::endl;
                return;
            }

            printAllPathPossibilities(n1, n2);
        }
    }
}

static void
detectLoopsOrGraph()
{
    if (o.genGraph) {
        std::cout << "digraph {" << std::endl
                  << "\tgraph [size=\"7.75,10.25\"]" << std::endl
                  << "\tnode [nodesep=0.75, ranksep=0.75]" << std::endl;

        for (auto all_nva : *allblocks) {
            all_nva->setVisited(false);
            if (all_nva == entry) {
                std::cout << std::hex << "\t\"0x"
                          << (o.withNVA ? all_nva->getNva() :
                              all_nva->getGva())
                          << "\" [shape=box, color=blue, style=bold]"
                          << std::endl;
            }
        }
    }
    uint32_t currentGVA = 0;
    bool nodesLeft      = false;
    uint32_t cluster    = 0;
    do {
        nodesLeft  = false;
        bool first = true;
        for (auto all_nva : *allblocks) {
            Block *curblock = all_nva;
            if (o.clusterGraph && o.withNVA && o.genGraph) {
                if (first) {
                    if (curblock->isVisited() == false) {
                        currentGVA = curblock->getGva();
                        std::cout << "\tsubgraph cluster_" << cluster++
                                  << " {" << std::endl
                                  << "\t\tlabel=\"0x" << std::hex
                                  << currentGVA << "\"" << std::endl;
                        first = false;
                    }
                } else {
                    if (curblock->getGva() != currentGVA) {
                        if (curblock->isVisited() == false)
                            nodesLeft = true;
                        continue;
                    }
                }
                if (curblock->isVisited())
                    continue;
                curblock->setVisited(true);
            }
            Block *maxsuccblock = 0;
            uint32_t  maxfreq = 0;
            if (curblock->getSuccCount() > 1) {
                for (auto succ : *curblock->getSucc()) {
                    uint32_t freq = curblock->getFreq(succ);
                    if (freq > maxfreq) {
                        maxsuccblock = succ;
                        maxfreq = freq;
                    }
                }
            }
            for (auto succ : *curblock->getSucc()) {
                if (o.genGraphHot) {
                    if (curblock->getFreq(succ) < o.bhTotalPathThreshold) {
                        continue;
                    }
                    if (curblock->getSuccCount() > 1) {
                        if (succ != maxsuccblock) {
                            continue;
                        }
                    }
                }
                if (o.genGraph) {
                    std::cout << "\t\t ";
                    if (curblock->isPseudo()) {
                        std::cout << "\"(pseudo) " << std::dec << curblock->getPseudoId();
                    } else {
                        std::cout << "\"0x" << std::hex << (o.withNVA ? curblock->getNva()
                                                            : curblock->getGva())
                                  << "/" << (o.withNVA ? curblock->getGva() : 0);
                    }
                    std::cout << "\"->";
                    if (succ->isPseudo()) {
                        std::cout << "\"(pseudo) " << std::dec << succ->getPseudoId();
                    } else {
                        std::cout << "\"0x" << std::hex << (o.withNVA ? succ->getNva()
                                                            : succ->getGva())
                                  << "/" << (o.withNVA ? succ->getGva() : 0);
                    }
                    std::cout  << "\" [";
                }
                if (isDominator(curblock, succ)) {
                    if (o.genGraph) {
                        std::cout << "color=red, style=dotted, ";
                    } else {
                        // successor is dominator of current node
                        std::cout << "Loop found (real backedge): "
                                  << std::hex << "\t0x" << curblock->getGva()
                                  << "\t->\t0x" << succ->getGva() << std::dec
                                  << "\tTotal trip count\t" << curblock->getFreq(succ)
                                  << std::endl;
                    }
                    if (!o.genGraph) {
                        if (o.printPathes)
                            printAllPathPossibilities(succ, curblock);
                    }
                } else if (succ == maxsuccblock) {
                    if (o.genGraph) {
                        std::cout << "color=green, ";
                    }
                }
                if (o.genGraph) {
                    std::cout << std::dec << "label=\"  "
                              << curblock->getFreq(succ) << " (MP rate "
                              << std::setprecision(3)
                              << 100.f * (double)curblock->getMPRate(succ)/(double)curblock->getFreq(succ)
                              << "%)\"]" << std::endl;
                }
            }
        }
        if (!first)
            std::cout << "\t\tcolor=green4\n\t}" << std::endl;
    } while (nodesLeft);
    if (o.genGraph)
        std::cout << "}" << std::endl;
}

static std::vector<uint32_t>
dom_intersection(std::vector<uint32_t> &v1, std::vector<uint32_t> &v2)
{
    std::vector<uint32_t> v3;

    std::sort(v1.begin(), v1.end());
    std::sort(v2.begin(), v2.end());

    std::set_intersection(v1.begin(), v1.end(), v2.begin(),
                          v2.end(), back_inserter(v3));

    return v3;
}

static void
statisticAboutLoopTripCounts() {
    ASSERT(temporalblocks->size() != 0);

    std::cout << "Starting trip count search" << std::endl;
    std::cout.flush();

#if 0
    for (auto tempcur : *temporalblocks) {
        std::cout << "0x" << tempcur->getNva();
        if (tempcur->isPseudo()) 
            std::cout << " (pseudo)";
        std::cout << std::endl;
    }
#endif

    unsigned int allblocks_size = allblocks->size();

#if WITH_OPENMP
#pragma omp parallel for
#endif
    for (unsigned int n = 0; n < allblocks_size; n++) {
        Block* cur = allblocks->at(n);

        for (auto succ : *cur->getSucc()) {
            if (!isDominator(cur, succ))
                continue;
#if HISTOGRAM
            std::map<uint32_t, uint32_t> loopcounts;
#else
            std::vector<uint32_t> loopcounts;
#endif

#define COUNT_SIDE_EXIT_LOOPS 0
            Block *tempprev = 0;
            uint32_t curcnt = 0;
            for (auto tempcur : *temporalblocks) {
                if (tempprev && (tempprev->getNva() == cur->getNva())
                    && (tempcur->getNva() == succ->getNva())) {
                    // Backedge taken
                    curcnt++;
                } else if (tempprev && (tempprev->getNva() == cur->getNva())
                           && (tempcur->getNva() != succ->getNva())) {
                    // Backedge not taken
#if HISTOGRAM
                    loopcounts[curcnt]++;
#else
                    loopcounts.push_back(curcnt);
#endif
                    curcnt = 0;
                } else if (tempprev && (tempcur->getNva() == succ->getNva())
                           && (tempprev->getNva() != cur->getNva())) {
                    // We are at header again but not from backedge,
                    // possibly side exit
                    if (COUNT_SIDE_EXIT_LOOPS && curcnt) {
#if HISTOGRAM
                        loopcounts[curcnt]++;
#else
                        loopcounts.push_back(curcnt);
#endif
                        curcnt = 0;
                    }
                }
                tempprev = tempcur;
            }
            if (COUNT_SIDE_EXIT_LOOPS && curcnt) {
#if HISTOGRAM
                loopcounts[curcnt]++;
#else
                loopcounts.push_back(curcnt);
#endif
                ASSERT(loopcounts.size() > 0);
            }
            // History!
            uint32_t zeros = 0;
            uint32_t totalcnt = 0;
            for (auto lc_it : loopcounts) {
#if HISTOGRAM
                if (lc_it.first == 0) {
                    zeros += lc_it.second;
                    totalcnt += lc_it.second;
                } else {
                    totalcnt += lc_it.second;
                }
#else
                if (lc_it == 0)
                    zeros++;
                totalcnt++;
#endif
            }
            bool mostly_zero = false;
            if ((double)zeros / (double)totalcnt > 0.5)
                mostly_zero = true;
            uint32_t freqofthispath = cur->getFreq(succ);
            uint32_t mpofthispath = cur->getMPRate(succ);
            double realmprate = 100.f * (double)mpofthispath / (double)freqofthispath;
#if WITH_OPENMP
#pragma omp critical
#endif
            {
                // successor is dominator of current node
                std::cout << "Loop found (real backedge): "
                          << std::hex << "\t0x" << cur->getNva()
                          << "\t->\t0x" << succ->getNva() << std::dec
                          << "\tTotal trip count\t" << freqofthispath
                          << "\tMP rate in %\t" << std::setprecision(3)
                          << realmprate;

                std::cout << "\tTrip counts:\t";
                if (mostly_zero) {
                    std::cout << "<mostly zero>";
                } else if (loopcounts.size() == 0) {
                    std::cout << "<side exit/don't know>";
                } else {
                    for (auto lc_it : loopcounts) {
#if HISTOGRAM
                        std::cout << lc_it.first << "/" << lc_it.second
#else
                        std::cout << lc_it
#endif
                                  << " ";
                    }
                }
            std::cout << std::endl;
            }
        }
    }
}

static int
countSubstring(const std::string& str, const std::string& sub)
{
    if (sub.length() == 0)
        return 0;
    int count = 0;
    for (size_t offset = str.find(sub); offset != std::string::npos;
	 offset = str.find(sub, offset + sub.length()))
        ++count;
    return count;
}

static void
detectBranchPredicationOpportunities() {
    unsigned int allblocks_size = allblocks->size();

    std::cout << "From\t\t" << "Hammock\t\t" << "To\t\t" << "MP rate\t"
              << "Total count\t" << "Instruction count\t"
              << "Improvement/cycles\t4 source instructions" << std::endl;
    std::cout << "GVA\tNVA\tGVA\tNVA\tGVA\tNVA" << std::endl;

#if WITH_OPENMP
    #pragma omp parallel for
#endif
    for (unsigned int n = 0; n < allblocks_size; n++) {
        Block* cur = allblocks->at(n);

        // Right now looking for hammocks only
        if (!cur->getLastInsn()->isCondBranch())
            continue;
        if (cur->getSuccCount() != 2)
            continue;
        // ASSERT(cur->getSuccCount() <= 2);
        for (uint32_t i = 0; i < cur->getSuccCount(); i++) {
            Block *succ = cur->getSucc()->at(i);
            if (succ->getSuccCount() != 1)
                continue;
            if (succ->getPredCount() != 1)
                continue;
            ASSERT(succ->getPred()->at(0) == cur);
            uint32_t j = i == 0 ? 1 : 0;
            if (succ->getSucc()->at(0)->getNva() != cur->getSucc()->at(j)->getNva())
                continue;
            uint32_t mprate = 100.f * (double)cur->getMPCount() /
                    (double)cur->getTotalCount();
            if (mprate < o.bhMPThreshold)
                continue;
            if (succ->getInstructionCount() > o.bhICThreshold)
                continue;
            ASSERT(succ->getTotalCount() >= cur->getFreq(succ));
            int32_t savings = cur->getTotalCount() * mprate * 12 / 100 -
                    succ->getTotalCount() * succ->getInstructionCount();
            if (savings <= 0)
                continue;
#if WITH_OPENMP
#pragma omp critical
#endif
            {
                std::cout << std::hex << "0x" << cur->getNvaEnd() << "\t0x"
                          << cur->getGvaEnd() << "\t0x" << succ->getNva() << "\t0x"
                          << succ->getGva() << "\t0x" << succ->getSucc()->at(0)->getNva()
                          << "\t0x" << succ->getSucc()->at(0)->getGva() << "\t"
                          << std::dec << mprate << "\t" << cur->getTotalCount()
                          << "\t" << succ->getInstructionCount() << "\t" << savings;
                for (auto inst : *(succ->getInsts())) {
                    std::string i_s(inst->getIasm());
                    if (countSubstring(i_s, "%r") <= 2)
                        continue;
                    if (countSubstring(i_s, "pred") != 0)
                        continue;
                    std::cout << "\t" << inst->getIasm();
                }
                std::cout << std::endl;
            }
        }
    }
}

static void
detectBranchCorrelations()
{
#if 1
    std::cout << "Benchmark\t" << "Branch\t\t\t" << "MP rate in %\t"
              << "Predecessor\t\t\t" << "Predecessor path count\t" << "Predecessor targets count\t"
              << "Joint point\t\t\t" << "To\t\t\t" << "Taken probability in %\t"
              << "Specific path count\t" << "Savings\t" << "Saving in % of total cycles\t" << "Maximum distance / blocks\t"
              << "Maximum distance / instructions" << std::endl;
    std::cout << "\tNVA\tGVA\tDisasm\t\t" << "NVA\tGVA\tDisasm\t\t\t"
              << "NVA\tGVA\tDisasm\t" << "NVA\tGVA\tDisasm"
              << std::endl;
#endif

    ASSERT(temporalblocks->size() != 0);
    double inpercentofTotalCycles_total= 0;

    int all_size = allblocks->size();
#if WITH_OPENMP
    #pragma omp parallel for
#endif
    for (int i = 0; i < all_size; i++) {
        Block* cur = allblocks->at(i);

        if (o.branchHintInd) {
            if (!cur->getLastInsn()->isGuestIndirectBranch())
                continue;
        } else {
            ASSERT(o.branchHintCond);
            if (!cur->getLastInsn()->isCondBranch())
                continue;
            // ASSERT(cur->getSuccCount() <= 2);
        }
        uint32_t mprate = 100.f * (double)cur->getMPCount() / (double)cur->getTotalCount();
        if (mprate < o.bhMPThreshold)
            continue;
        Block *prev = cur;
        int distance = 0;
        int noofblocks = 0;

        // find joint point
        while ((prev->getPredCount() == 1) &&
               !prev->isPseudo() &&
               (prev != entry)) {
            prev = prev->getPred()->front();
            distance += prev->getInstructionCount();
            noofblocks++;
        }
        // prev now has more than 1 predecessor
#if SIMPLIFY_WITH_XOR
        std::unordered_map<uint32_t, uint32_t> branchcorrelation;
#else
        std::unordered_map<uint64_t, uint32_t> branchcorrelation;
#endif
        uint32_t totalcount= 0;
        for (auto pred_it = prev->getPred()->begin();
             pred_it != prev->getPred()->end(); ++pred_it) {
            Block *pred = *pred_it;
            if (pred->isPseudo())
                continue;
            if (pred == entry)
                continue;
            bool armed = false;
            bool recordnext = false;
            for (auto all_temp_it = temporalblocks->begin();
                 all_temp_it !=temporalblocks->end(); ++all_temp_it) {
                // check the correlation of (*pred_it)->getNva()'s path,
                // when going to prev->getNva(), and cur->getNva()'s path.
                // prev might be cur.
                if ((*all_temp_it)->getNva() == pred->getNva()) {
                    armed = true;
                } else if (armed
                           && ((*all_temp_it)->getNva() == cur->getNva())) {
                    armed = false;
                    recordnext = true;
                } else if (recordnext) {
                    recordnext = false;
#if SIMPLIFY_WITH_XOR
                    uint32_t hcorr = pred->getNva() ^ (*all_temp_it)->getNva();
#else
                    uint64_t hcorr = ((uint64_t)pred->getNva() << 32) |
                            (uint64_t)(*all_temp_it)->getNva();
#endif
                    branchcorrelation[hcorr]++;
                    totalcount++;
                }
            }
        }
        for (auto pred_it = prev->getPred()->begin();
             pred_it != prev->getPred()->end(); ++pred_it) {
            Block *pred = *pred_it;
            if (pred->isPseudo())
                continue;
            uint32_t totalforindsuccessor = 0;
            for (auto succ_it = cur->getSucc()->begin();
                 succ_it != cur->getSucc()->end(); ++succ_it) {
                Block *succ = *succ_it;
                if (succ->isPseudo())
                    continue;
#if SIMPLIFY_WITH_XOR
                uint32_t hcorr = pred->getNva() ^ succ->getNva();
#else
                uint64_t hcorr = ((uint64_t)pred->getNva() << 32) | (uint64_t)succ->getNva();
#endif
                if (branchcorrelation.find(hcorr) == branchcorrelation.end())
                    continue;
                uint32_t bcn = branchcorrelation.find(hcorr)->second;
                if (bcn < o.bhTotalPathThreshold)
                    continue;
                totalforindsuccessor += bcn;
            }
            for (auto succ_it = cur->getSucc()->begin();
                 succ_it != cur->getSucc()->end(); ++succ_it) {
                Block *succ = *succ_it;
                if (succ->isPseudo())
                    continue;
#if SIMPLIFY_WITH_XOR
                uint32_t hcorr = pred->getNva() ^ succ->getNva();
#else
                uint64_t hcorr = ((uint64_t)pred->getNva() << 32) | (uint64_t)succ->getNva();
#endif
                if (branchcorrelation.find(hcorr) == branchcorrelation.end())
                    continue;

                uint32_t bcn = branchcorrelation.find(hcorr)->second;
                if (bcn < o.bhTotalPathThreshold)
                    continue;

                uint32_t percentofthispath = 100.f * (double)bcn / (double)totalforindsuccessor;
                if (percentofthispath < o.bhPathConfThreshold)
                    continue;

                uint32_t mpofthispath = cur->getMPRate(succ);
                uint32_t freqofpredpath = pred->getFreq(prev);
                uint32_t freqofcurpath = cur->getFreq(succ);
                ASSERT(mpofthispath <= freqofcurpath);

                uint32_t realmprate = 100.f * (double)mpofthispath / (double)freqofcurpath;

                uint32_t assumedfreqoftotalpath = 0.75 * std::min(freqofpredpath, freqofcurpath);
                int32_t savings = assumedfreqoftotalpath * ((((double)realmprate / 100.f) * 15.f)
                                                            - ((100.f - (double)percentofthispath) * 15.f / 100.f));
                ASSERT(lastCommitCC != 0);
                double inpercentofTotalCycles = 100 * (double)savings / (double)lastCommitCC;

                if (savings <= 0)
                    continue;

                uint32_t noofpredtargets = 0;
                for (auto succ_it = pred->getSucc()->begin();
                     succ_it != pred->getSucc()->end(); ++succ_it) {
                    Block *succ = *succ_it;
                    if (succ->isPseudo())
                        continue;
                    noofpredtargets++;
                }

#if WITH_OPENMP
                #pragma omp atomic
#endif
                inpercentofTotalCycles_total += inpercentofTotalCycles;

                int thisdistance = distance + pred->getInstructionCount();
#if WITH_OPENMP
                #pragma omp critical
#endif
                {
                    std::cout << fname << std::hex << "\t0x" << cur->getNvaEnd() << "\t0x" << cur->getGvaEnd() << "\t"
                              << std::dec << cur->getLastInsn()->getIasm() << "\t" << mprate << std::hex << "\t0x"
                              << pred->getNvaEnd() << "\t0x" << pred->getGvaEnd() << "\t" << pred->getLastInsn()->getIasm() << "\t"
                              << std::dec << freqofpredpath << "\t" << noofpredtargets << std::hex << "\t0x" << prev->getNvaEnd() << "\t0x"
                              << prev->getGvaEnd() << "\t" << prev->getLastInsn()->getIasm() << "\t0x" << succ->getNvaEnd() << "\t0x"
                              << succ->getGvaEnd() << "\t" << succ->getLastInsn()->getIasm() << "\t" << std::dec << percentofthispath
                              << "\t" << freqofcurpath << "\t" << savings << "\t" << inpercentofTotalCycles << "\t" << noofblocks + 1
                              << "\t" << thisdistance << std::endl;
                }
            }
        }
    }
    std::cout << fname << "\ttotal in %\t" << std::dec << inpercentofTotalCycles_total
              << " (last cycle " << lastCommitCC << ")" << std::endl;
}

static void
printBlocks()
{
    for (auto cur : *allblocks) {
        std::cout << std::hex << "Block\t0x" << cur->getNva() << "\t0x"
                  << cur->getGva();
        if (cur->isPseudo()) {
            std::cout << "\t<pseudo>" << std::endl << std::endl;
            continue;
        }
        std::cout << std::endl;
        for (auto inst : *(cur->getInsts())) {
            std::cout << std::hex << "0x" << inst->getNva() << "\t0x"
                      << inst->getGva() << "\t" << inst->getIasm()
                      << std::endl;
        }
        std::cout << std::endl;
    }
}

static void
computeDominators()
{
    if (dominatorscomputed)
        return;
#if 0
    // blah remove this
    std::cout << "Compute dominators" << std::endl;
    std::cout.flush();
#endif
    ASSERT(entry);

    if (!o.genGraph) {
        std::cout << std::hex << "Entry point address: 0x"
                  << (o.withNVA ? entry->getNva() : entry->getGva())
                  << std::endl;
    }

    std::vector<uint32_t> allblocks_tmp;
    for (unsigned int i = 0; i < allblocks->size(); ++i) {
        allblocks_tmp.push_back((uint32_t)i);
        allblocks->at(i)->setDomId((uint32_t)i);
    }
    for (unsigned int i = 0; i < allblocks->size(); ++i) {
        Block *cur = allblocks->at(i);
        if ((cur == entry)
            || cur->isPseudo()) {
            ASSERT(i <= 0xffffffff);
            (*dominators)[(uint32_t)i].push_back((uint32_t)i);
            continue;
        }
        (*dominators)[(uint32_t)i] = allblocks_tmp;
    }

    int change;
    unsigned int allblocks_size = allblocks->size();

#if WITH_OPENMP
    #pragma omp parallel
#endif
    do {
#if WITH_OPENMP
        #pragma omp barrier
        // Has to be a barrier so all threads are
        // done with reading 'change'
        #pragma omp single
#endif
        {
            change &= 0;
        }

#if WITH_OPENMP
        #pragma omp for
#endif
        for (unsigned int n = 0; n < allblocks_size; ++n) {
            Block *cur = allblocks->at(n);
            bool local_change = false;

            if ((cur == entry)
                || cur->isPseudo())
                continue;

            std::vector<uint32_t> new_dominators= allblocks_tmp;
            ASSERT(allblocks_tmp.size() == allblocks->size());
            unsigned int new_dominators_size = allblocks_tmp.size();

            auto current_dominators_it = dominators->find(n);
            ASSERT(current_dominators_it != dominators->end());
            unsigned int current_dominators_size = current_dominators_it->second.size();

#if ENABLE_DEBUG
            std::cout << "Old dominators of 0x" << std::hex
                      << (o.withNVA ? cur->getNva() : cur->getGva())
                      << ":" << std::endl;

            for (unsigned int i = 0; i < current_dominators_size; i++) {
                std::cout << "0x" << std::hex
                          << (o.withNVA ? allblocks->at(current_dominators_it->second.at(i))->getNva() :
                              allblocks->at(current_dominators_it->second.at(i))->getGva()) << ", ";
            }
#endif

            for (auto pred_it : *(cur->getPred())) {
                auto pred_dominators_it = dominators->find(pred_it->getDomId());
                ASSERT(pred_dominators_it != dominators->end());

                std::vector<uint32_t> pred_dominators = pred_dominators_it->second;
                new_dominators = dom_intersection(new_dominators, pred_dominators);

#if ENABLE_DEBUG
                new_dominators_size = new_dominators.size();
                std::cout << std::endl;
                std::cout << "New dominators of " << "0x"
                          << std::hex << (o.withNVA ? cur->getNva() :
                                          cur->getGva())
                          << " (predecessor " << "0x"
                          << (o.withNVA ? pred_it->getNva() : pred_it->getGva())
                          << ") :" << std::endl;
                for (unsigned int i = 0; i < new_dominators_size; i++) {
                    std::cout << "0x" << std::hex
                              << (o.withNVA ? allblocks->at(new_dominators.at(i))->getNva() :
                                  allblocks->at(new_dominators.at(i))->getGva())
                              << ", ";
                }
                std::cout << std::endl;
#endif
            }

            ASSERT(n <= 0xffffffff);
            new_dominators.push_back((uint32_t)n);
            new_dominators_size = new_dominators.size();

            if (current_dominators_size != new_dominators_size) {
#if WITH_OPENMP
                #pragma omp atomic
#endif
                change |= 1;
                local_change = true;
            } else {
                sort(current_dominators_it->second.begin(),
                     current_dominators_it->second.end());
                sort(new_dominators.begin(), new_dominators.end());
                for (unsigned int i = 0; i < current_dominators_size; i++) {
                    if (current_dominators_it->second.at(i) != new_dominators.at(i)) {
#if WITH_OPENMP
                        #pragma omp atomic
#endif
                        change |= 1;
                        local_change = true;
                        break;
                    }
                }
            }

            if (!local_change)
                continue;

#if ENABLE_DEBUG
            std::cout << "Old dominators of " << "0x"
                      << std::hex << (o.withNVA ? cur->getNva() :
                                      cur->getGva())
                      << ":" << std::endl;
            for (unsigned int i = 0; i < current_dominators_size; i++) {
                std::cout << "0x"
                          << std::hex
                          << (o.withNVA ? allblocks->at(current_dominators_it->second.at(i))->getNva()
                              : allblocks->at(current_dominators_it->second.at(i))->getGva())
                          << ", ";
            }
            std::cout << std::endl
                      << "New dominators of " << std::hex << "0x"
                      << (o.withNVA ? cur->getNva() :
                          cur->getGva())
                      << ":" << std::endl;
            for (unsigned int i = 0; i < new_dominators_size; i++) {
                std::cout << "0x"
                          << std::hex
                          << (o.withNVA ? allblocks->at(new_dominators.at(i))->getNva() :
                              allblocks->at(new_dominators.at(i))->getGva())
                          << ", ";
            }

            std::cout << std::endl;
#endif
            ASSERT(n <= 0xffffffff);
            ASSERT(dominators->find((uint32_t)n) != dominators->end());
#if WITH_OPENMP
            #pragma omp critical
#endif
            {
                (*dominators)[(uint32_t)n] = new_dominators;
            }
        }
#if WITH_OPENMP
        #pragma omp barrier
        // Has to be a barrier so all threads are
        // done with potential changes
#endif
        if (!change)
            break;
    } while (true);

    dominatorscomputed = true;
#if 0
    // blah remove this
    std::cout << "Done" << std::endl;
    std::cout.flush();
#endif
}

// 1) if current NVA is beginning of an existing block,
//    then terminate (fallthrough, e.g. hammock)
// 2) if current NVA is within an existing block, split it!

static Block*
createNewBlock(uint32_t nva, uint32_t gva, bool pseudo) {
    gva = pseudo ? Block::pseudoid + 1 : gva;
    Block *b = new Block(nva, gva, pseudo);
#if SANITY_CHECKS
    if (gva_nva_allblocks->find(gva) != gva_nva_allblocks->end()) {
        ASSERT((*gva_nva_allblocks)[gva].find(nva)
               == (*gva_nva_allblocks)[gva].end());
    }
#endif
    if (o.withNVA)  {
        (*gva_nva_allblocks)[gva][nva] = b;
    } else {
        (*gva_nva_allblocks)[gva][0] = b;
    }
    return b;
}

static Block*
getNewBlock(disasmLogEntry_t *entry, uint32_t psimlineno) {
    uint32_t nva = entry->nva;
    uint32_t gva = entry->gva;
    if (o.withNVA) {
        auto gva_it = gva_nva_allblocks->find(gva);
        if (gva_it == gva_nva_allblocks->end()) {
            Block *b = createNewBlock(nva, gva, false);
            b->addInstruction(new Instruction(entry, psimlineno));
            return b;
        } else {
            auto nva_it = gva_it->second.find(nva);
            if (nva_it == gva_it->second.end()) {
                Block *b = createNewBlock(nva, gva, false);
                b->addInstruction(new Instruction(entry, psimlineno));
                return b;
            } else {
                Block *n = nva_it->second;
                return n;
            }
        }
    } else {
        auto gva_it = gva_nva_allblocks->find(gva);
        if (gva_it == gva_nva_allblocks->end()) {
            Block *b = createNewBlock(nva, gva, false);
            b->addInstruction(new Instruction(entry, psimlineno));
            return b;
        } else {
            Block *n = gva_it->second[0];
            return n;
        }
    }
}

static void
adjustBlockCounts(Block *prev, Block *cur, bool lastwasmp) {
    ASSERT(cur);
    if (prev && ((!o.withNVA && (prev->getGva() != cur->getGva()))
                 || o.withNVA)) {
        prev->incrementFreq(cur);
        if (lastwasmp)
            prev->incrementMPRate(cur);
    }
    cur->incrementTotalCount();
}

static void
doSuccessorPredecessor(Block *prev, Block *cur) {
    if (prev && ((!o.withNVA && (prev->getGva() != cur->getGva()))
                 || o.withNVA)) {
        // Push predecessor to current if not present yet
        if (find(cur->getPred()->begin(), cur->getPred()->end(), prev)
            == cur->getPred()->end()) {
            cur->getPred()->push_back(prev);
        }
        // Push successor to previous if not present yet
        if (find(prev->getSucc()->begin(), prev->getSucc()->end(), cur)
            == prev->getSucc()->end()) {
            prev->getSucc()->push_back(cur);
        }
#if SANITY_CHECKS
        if (o.withNVA) {
            if (prev->getLastInsn()->isCondBranch()) {
                // ASSERT(prev->getSuccCount() <= 2);
            }
        }
#endif
        prev->setLeavesRegion(prev->getGva() != cur->getGva());
    }
}

static Block*
maybeSwitchBlock(Block *cur, uint32_t nva, uint32_t gva, bool lastwasmp) {
    ASSERT(o.withNVA);

    auto gva_it = gva_nva_allblocks->find(gva);

    if (gva_it == gva_nva_allblocks->end()) {
        return cur;
    } else {
        auto nva_it = gva_it->second.find(nva);
        if (nva_it == gva_it->second.end()) {
            return cur;
        } else {
            if (o.branchHintInd || o.branchHintCond
                || o.loopStatistics || o.searchDeadCode)
                temporalblocks->push_back(nva_it->second);
            doSuccessorPredecessor(cur, nva_it->second);
            adjustBlockCounts(cur, nva_it->second, lastwasmp);
#if SANITY_CHECKS
            uint32_t totalf = 0;
            for (auto pred : *(nva_it->second->getPred())) {
                uint32_t cf = pred->getFreq(nva_it->second);
                ASSERT(cf >= 1);
                totalf += cf;
            }
            totalf = totalf == 0 ? 1 : totalf;
            ASSERT(totalf <= nva_it->second->getTotalCount());
            uint32_t totalmp = 0;
            for (auto succ : *(nva_it->second->getSucc())) {
                uint32_t mp = nva_it->second->getMPRate(succ);
                totalmp += mp;
            }
            ASSERT(totalmp == nva_it->second->getMPCount());
#endif
            return nva_it->second;
        }
    }
    return cur;
}

static void
adjustTemporalBlocks(Block *nold, Block *n) {
    if (!(o.branchHintInd || o.branchHintCond
          || o.loopStatistics)) {
        return;
    }
    bool found = false;
    for (auto tmpb_it = temporalblocks->begin();
         tmpb_it != temporalblocks->end(); tmpb_it++) {
        if (*tmpb_it == nold) {
            ++tmpb_it;
            found = true;
            temporalblocks->insert(tmpb_it, n);
        }
    }
    ASSERT(found);
}

static bool
maybeSplitBlock(Block **cur, Block **prev, disasmLogEntry_t *entry,
                uint32_t psimlineno, bool /*lastwasmp*/) {
    uint32_t nva = entry->nva;
    uint32_t gva = entry->gva;

    if (!o.withNVA)
        return false;

    auto all_gva_it = gva_nva_allblocks->lower_bound(gva);
    if (all_gva_it != gva_nva_allblocks->begin())
        --all_gva_it;
    for (; all_gva_it != gva_nva_allblocks->end(); ++all_gva_it) {
        if (all_gva_it->first != gva)
            continue;
        auto all_nva_it = all_gva_it->second.lower_bound(gva);
        if (all_nva_it != all_gva_it->second.begin())
            --all_nva_it;
#if SANITY_CHECKS
        uint32_t last_nva = 0;
#endif
        for (; all_nva_it != all_gva_it->second.end(); ++all_nva_it) {
            Block *nold = all_nva_it->second;

#if SANITY_CHECKS
            ASSERT(last_nva < nold->getNva());
            last_nva = nold->getNva();
#endif
            if (nold->getNva() >= nva)
                break;
            if (!((nold->getNva() < nva)
                  && (nold->getNvaEnd() >= nva)))
                continue;
            // case 2) split
            ASSERT(nold->getInstructionCount() > 1);
            nold->setNvaEndToLast(nva);
            Block *b = createNewBlock(nva, gva, false);
            b->addInstruction(new Instruction(entry, psimlineno));
            for (auto it : *(nold->getSucc())) {
                doSuccessorPredecessor(b, it);
                b->setFreq(it, nold->getFreq(it));
                b->setMPRate(it, nold->getMPRate(it));
                it->removePred(nold);
            }
            b->setMPCount(nold->getMPCount());
            nold->setMPCount(0);
            nold->clearSucc();
            adjustTemporalBlocks(nold, b);
            doSuccessorPredecessor(nold, b);
            nold->setFreq(b, nold->getTotalCount());
            nold->setMPRate(b, 0);
            b->setTotalCount(nold->getTotalCount());
#if SANITY_CHECKS
            uint32_t totalf = 0;
            for (auto pred : *(b->getPred())) {
                uint32_t cf = pred->getFreq(b);
                ASSERT(cf >= 1);
                totalf += cf;
            }
            totalf = totalf == 0 ? 1 : totalf;
            ASSERT(totalf <= b->getTotalCount());
            uint32_t totalmp = 0;
            for (auto succ : *(b->getSucc())) {
                uint32_t mp = b->getMPRate(succ);
                totalmp += mp;
            }
            ASSERT(totalmp <= b->getMPCount());
#endif
            if (nold == *cur)
                *prev = b;
            *cur = b;
            return true;
        }
    }

    return false;
}

static void
parseBranchTarget(Block *cur, disasmLogEntry_t *entry) {
    std::string s(entry->assembly);
    size_t pos = s.find("brn");
    if (pos != std::string::npos) {
        char *p = entry->assembly + pos;
        while (!isspace(*p))
            ++p;
        while (isspace(*p))
            ++p;
        int32_t off;
        int r = sscanf(p, "%d", &off);
        if (r != 1)
            return;
#if SANITY_CHECKS
        if (cur->getLastInsn()->isBranchTargetAddrValid()) {
            ASSERT(cur->getLastInsn()->getBranchTargetAddr()
                   == cur->getLastInsn()->getNva() + off);
        }
#endif
        cur->getLastInsn()->setBranchTargetAddr(off);
    }
}

static void
parseBranchOpcode(Block *cur, disasmLogEntry_t *entry) {
    ASSERT(cur);
    ASSERT(entry->events.br);

    char *p = entry->assembly;
    char *psave = p;

    cur->getLastInsn()->setBranch(true);

    // bad, slow
    while (*p != '\0') {
        if (!strncmp(p, "g.brn.ind", 9)) {
            cur->getLastInsn()->setGuestIndirectBranch(true);
            if (o.searchBranchAssert)
                parseBranchTarget(cur, entry);
            return;
        }
        p++;
    }
    p = psave;
    while (*p != '\0') {
        if (!strncmp(p, "g.brn.rtn", 9)) {
            cur->getLastInsn()->setGuestBranchReturn(true);
            if (o.searchBranchAssert) {
                ASSERT(0); // TODO
                parseBranchTarget(cur, entry);
            }
            return;
        }
        p++;
    }
    p = psave;
    bool uncond = false;
    while (*p != '\0') {
        if (!strncmp(p, "%zero.EQ", 8)) {
            uncond = true;
            p++;
            continue;
        }
        if (!strncmp(p, " brn", 4)) {
            if (uncond)
                cur->getLastInsn()->setUncondBranch(true);
            else
                cur->getLastInsn()->setCondBranch(true);
            if (o.searchBranchAssert)
                parseBranchTarget(cur, entry);
            return;
        }
        if (!strncmp(p, " n.brn", 4)) {
            if (uncond)
                cur->getLastInsn()->setUncondBranch(true);
            else
                cur->getLastInsn()->setCondBranch(true);
            cur->getLastInsn()->setNativeBranch(true);
            if (o.searchBranchAssert)
                parseBranchTarget(cur, entry);
            return;
        }
        p++;
    }
}

static bool
convertBlocksToVector()
{
    allblocks = new std::vector<Block*>;
    FOR_EACH_GVA_NVA_ALLBLOCKS(all_gva_it, all_nva_it) {
        allblocks->push_back(all_nva_it.second);
        if (o.entryAddress
            && all_nva_it.second->getNva() == o.entryAddress) {
            entry = all_nva_it.second;
        }
    }}
    delete gva_nva_allblocks;
    if (o.limitMemory && (allblocks->size() > 131072))
        return false;
    return true;
}

static void
buildGraphFromPsimLog()
{
    uint32_t psimlineno = 0;
    uint32_t r;
    gzFile fpGz;
    disasmLogEntry_t disasmEntry;

    // Copy message to stdout if it is redirected to a file.
    if (!o.genGraph) {
        if ( !isatty(fileno(stdout)) )
            std::cout << std::endl << "Parsing psim disasm log \'"
                      << fname << "\'\n";
	    std::cout.flush();
    }

    fpGz = disasmLogOpen(fname);
    // Eat header line.
    disasmLogGetHeader(fpGz, &psimlineno);

    Block *cur = 0, *prev = 0;

    // Read past initial lines with alloc_cc==0 or global events.
    // Not sure why we are doing this but took it over from
    // other tools - hgreving
#if 0
    do {
	r = disasmLogGetEntry(fpGz, &disasmEntry, &psimlineno);
    } while ((r==DisasmLogCC0Entry) || (r==DisasmLogGlobalEntry));
#endif
    stats_blocks_created = 0;
    bool nextblock = true;
    bool lastwasmp = false;
    // If a global event or non-APP mode instruction has been seen,
    // fetching is turned off, we wait until the next 2 branches in
    // APP mode, then resume disasm.log fetch.
    filter_state_e filterins = NO_FILTER;
    do {
	r = disasmLogGetEntry(fpGz, &disasmEntry, &psimlineno);

        if (r == DisasmLogEOF)
	    break;

        if (o.skipDisasm) {
            if (psimlineno < o.skipDisasm)
                continue;
        }
        // mask out thumb bit of GVA
        disasmEntry.gva &= 0xfffffffe;

#if !ENABLE_STATS
	if ( (psimlineno % 100000) == 0 ) {
            if (!o.genGraph && !o.branchHintInd &&
                !o.branchHintCond && !o.branchPredication)
                std::cout << ".";
	    std::cout.flush();
	}
#endif
#if ENABLE_STATS
	if ( (psimlineno % 250000) == 0 ) {
            std::cout << std::dec << stats_blocks_created << "\t";
            stats_blocks_created = 0;
	    std::cout.flush();
	}
#endif
        if (r == DisasmLogGlobalEntry) {
	    // Simply skip global events for now.
	    continue;
        }

        // SW-18993
        if ( !strcmp(disasmEntry.assembly, "")) {
            continue;
        }
        lastCommitCC = disasmEntry.commitCC;
	if (r != DisasmLogNormalEntry)
	    ASSERT(0);
        if (!disasmEntry.isAPPComp) {
            nextblock = false;
            if (cur) {
                Block *b = createNewBlock(0, 0, true);
                b->addInstruction(new Instruction(&disasmEntry, psimlineno));
                if (o.branchHintInd || o.branchHintCond
                    || o.loopStatistics || o.searchDeadCode)
                    temporalblocks->push_back(b);
                doSuccessorPredecessor(prev, b);
                adjustBlockCounts(prev, b, lastwasmp);
                prev = b;
            }
            cur = 0;
            filterins = NO_APP_SEEN;
            lastwasmp = false;
            continue;
        }
        if (nextblock) {
            ASSERT(filterins == NO_FILTER);

            // We are here because the previous one was a branch.
            // We either need to split an existing block, or take
            // an existing one, or create a new one

            nextblock = false;

            bool splitted = maybeSplitBlock(&cur, &prev, &disasmEntry,
                                            psimlineno, lastwasmp);
            if (!splitted)
                cur = getNewBlock(&disasmEntry, psimlineno);

            if (o.branchHintInd || o.branchHintCond
                || o.loopStatistics || o.searchDeadCode)
                temporalblocks->push_back(cur);
            doSuccessorPredecessor(prev, cur);
            adjustBlockCounts(prev, cur, lastwasmp);

#if SANITY_CHECKS
            uint32_t totalf = 0;
            for (auto pred : *(cur->getPred())) {
                uint32_t cf = pred->getFreq(cur);
                ASSERT(cf >= 1);
                totalf += cf;
            }
            totalf = totalf == 0 ? 1 : totalf;
            ASSERT(totalf <= cur->getTotalCount());
            uint32_t totalmp = 0;
            for (auto succ : *(cur->getSucc())) {
                uint32_t mp = cur->getMPRate(succ);
                totalmp += mp;
            }
            ASSERT(totalmp == cur->getMPCount());
#endif

            if (!prev && !entry)
                entry = cur;
            // We link previous blocks later
            // optimistically, otherwise we get
            // weird control flow graphs, but it
            // is not 100% correct. This makes
            // control flow graph coherent.
            if (cur)
                prev = cur;
        } else {
            // This might be an existing block,
            // switch to it.
            ASSERT(lastwasmp == false);
            if (o.withNVA)
                cur = maybeSwitchBlock(cur, disasmEntry.nva, disasmEntry.gva, lastwasmp);
            // Same as above, coherent control flow graph.
            if (cur)
                prev = cur;
        }

        if ((filterins != NO_APP_SEEN) && disasmEntry.events.br) {
            ASSERT(!cur || (prev == cur));

            if (filterins == NEXT_BRANCH) {
                filterins = NO_FILTER;
                nextblock = true;
                continue;
            }
            if (!cur) {
                nextblock = true;
                continue;
            }

            ASSERT(filterins == NO_FILTER);
            nextblock = true;

            if (o.withNVA) {
                if (cur->getNvaEnd() < disasmEntry.nva)
                    cur->addInstruction(new Instruction(&disasmEntry, psimlineno));
            } else {
                cur->addInstruction(new Instruction(&disasmEntry, psimlineno));
            }

            if (o.branchHintInd || o.branchHintCond
                || o.branchPredication || o.searchBranchAssert)
                parseBranchOpcode(cur, &disasmEntry);
            if (disasmEntry.events.mp) {
                cur->incrementMPCount();
                lastwasmp = true;
            } else {
                lastwasmp = false;
            }

            continue;
        } else {
            lastwasmp = false;
        }
        if (filterins == NO_APP_SEEN) {
            filterins = NEXT_BRANCH;
            continue;
        }
        if (!cur || (filterins != NO_FILTER)) {
            continue;
        }
        ASSERT(filterins == NO_FILTER);
        ASSERT(!cur || (prev == cur));

        if (o.withNVA) {
            if (cur->getNvaEnd() < disasmEntry.nva)
                cur->addInstruction(new Instruction(&disasmEntry, psimlineno));
        } else {
            cur->addInstruction(new Instruction(&disasmEntry, psimlineno));
        }
    } while (true);
    std::cout << std::endl;
}


// this should reproduce the crash:
// /tools/local/creduce-2.4.0/libexec/clang_delta --transformation=replace-function-def-with-decl --counter=1 /ld2/mvillmow/pviz45d/clang_delta_crash_tmp__AyfXntWUI.cpp