From 46a5b0d044634977d3a187c202f281ac980e8726 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 20 Aug 2023 17:23:14 +0800 Subject: [PATCH] feat(transform): relayout 2d array --- CMakeLists.txt | 4 +- cmmc/IR/GlobalVariable.cpp | 1 + cmmc/IR/GlobalVariable.hpp | 7 +- cmmc/Target/RISCV/RISCVTarget.cpp | 12 +- cmmc/Transforms/Misc/Relayout.cpp | 284 ++ cmmc/Transforms/TransformPass.cpp | 2 + educg/cmmc_file_filter.py | 2 +- tests/SysY2022/functional/66_exgcd.riscv.s | 14 +- tests/SysY2022/functional/75_max_flow.riscv.s | 221 +- .../functional/93_nested_calls.riscv.s | 38 +- .../SysY2022/hidden_functional/09_BFS.riscv.s | 588 ++-- .../hidden_functional/19_search.riscv.s | 481 ++-- .../hidden_functional/19_search.sy.ir | 90 +- .../hidden_functional/20_sort.riscv.s | 476 +-- .../hidden_functional/21_union_find.riscv.s | 160 +- .../22_matrix_multiply.riscv.s | 423 ++- .../22_matrix_multiply.sy.ir | 196 +- .../hidden_functional/23_json.riscv.s | 451 ++- .../hidden_functional/28_side_effect2.riscv.s | 770 ++--- .../hidden_functional/39_fp_params.riscv.s | 756 ++--- tests/SysY2022/performance/fft0.riscv.s | 10 +- tests/SysY2022/performance/fft1.riscv.s | 10 +- tests/SysY2022/performance/fft2.riscv.s | 10 +- tests/SysY2022/performance/matmul1.riscv.s | 2539 +++++++++-------- tests/SysY2022/performance/matmul1.sy.ir | 1382 ++++----- tests/SysY2022/performance/matmul2.riscv.s | 2539 +++++++++-------- tests/SysY2022/performance/matmul2.sy.ir | 1382 ++++----- tests/SysY2022/performance/matmul3.riscv.s | 2539 +++++++++-------- tests/SysY2022/performance/matmul3.sy.ir | 1382 ++++----- .../SysY2022/performance/vector_mul1.riscv.s | 1010 +++---- .../SysY2022/performance/vector_mul2.riscv.s | 999 ++++--- .../SysY2022/performance/vector_mul3.riscv.s | 955 +++---- 32 files changed, 10358 insertions(+), 9375 deletions(-) create mode 100644 cmmc/Transforms/Misc/Relayout.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9aa46d62d..7ba12fd31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,9 +175,9 @@ COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Conversion/ ${C COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Transforms/Verify.cpp COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Support/Deterministic.cpp COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Support/mimalloc -COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Runtime/ +COMMAND ${CMAKE_COMMAND} -E rename ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Runtime/cmmc_sysy_rt.cpp ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Runtime/cmmc_sysy_rt.hpp COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMMC_EDUCG_SUBMIT_DIR}/MIPS ${CMMC_EDUCG_SUBMIT_DIR}/TAC -COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/LLVM ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/MIPS ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/Sim ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/TAC ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/Old ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/Template ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/Generic +COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/LLVM ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/MIPS ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/Sim ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/TAC ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/Old ${CMMC_EDUCG_SUBMIT_DIR}/cmmc/Target/Generic COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/educg/cmmc_header_fix.py ${CMMC_EDUCG_SUBMIT_DIR} COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/educg/cmmc_file_filter.py ${CMMC_EDUCG_SUBMIT_DIR} COMMAND cd ${CMMC_EDUCG_SUBMIT_DIR} diff --git a/cmmc/IR/GlobalVariable.cpp b/cmmc/IR/GlobalVariable.cpp index edd6f4371..0991af538 100644 --- a/cmmc/IR/GlobalVariable.cpp +++ b/cmmc/IR/GlobalVariable.cpp @@ -35,6 +35,7 @@ void GlobalVariable::dump(std::ostream& out, const HighlightSelector& selector) HANDLE_ATTR(ReadOnly); HANDLE_ATTR(Flexible); HANDLE_ATTR(InitOnce); + HANDLE_ATTR(Transposed); #undef HANDLE_ATTR out << '}'; diff --git a/cmmc/IR/GlobalVariable.hpp b/cmmc/IR/GlobalVariable.hpp index fa7aa10f6..21026fb2e 100644 --- a/cmmc/IR/GlobalVariable.hpp +++ b/cmmc/IR/GlobalVariable.hpp @@ -21,7 +21,12 @@ CMMC_NAMESPACE_BEGIN -enum class GlobalVariableAttribute { ReadOnly = 1 << 0, Flexible = 1 << 1, InitOnce = 1 << 2 }; +enum class GlobalVariableAttribute { + ReadOnly = 1 << 0, + Flexible = 1 << 1, + InitOnce = 1 << 2, + Transposed = 1 << 3, +}; class GlobalVariable final : public GlobalValue { ConstantValue* mStaticInitializedValue{ nullptr }; diff --git a/cmmc/Target/RISCV/RISCVTarget.cpp b/cmmc/Target/RISCV/RISCVTarget.cpp index c473d77c2..7f88d1f66 100644 --- a/cmmc/Target/RISCV/RISCVTarget.cpp +++ b/cmmc/Target/RISCV/RISCVTarget.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -257,7 +258,16 @@ class RISCVTarget final : public Target { // out << ".option arch rv64gc_zba_zbb\n"; out << R"(.attribute arch, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zba1p0_zbb1p0")" << '\n'; } - if(runtime == RuntimeType::SysYRuntime) { + auto useSysYRuntime = [&] { + for(auto& global : module.globals()) { + const auto symbol = global->reloc->symbol(); + if(symbol == String::get("cmmcParallelFor") || symbol == String::get("cmmcCacheLookup") || + symbol == String::get("cmmcAddRec3SRem") || symbol == String::get("cmmcReduceAddI32")) + return true; + } + return false; + }; + if(runtime == RuntimeType::SysYRuntime && useSysYRuntime()) { out << builtinSysYRuntime; } diff --git a/cmmc/Transforms/Misc/Relayout.cpp b/cmmc/Transforms/Misc/Relayout.cpp new file mode 100644 index 000000000..3cffc5d38 --- /dev/null +++ b/cmmc/Transforms/Misc/Relayout.cpp @@ -0,0 +1,284 @@ +/* + SPDX-License-Identifier: Apache-2.0 + Copyright 2023 CMMC Authors + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CMMC_NAMESPACE_BEGIN + +class Relayout final : public TransformPass { + // square only + static bool isArray2D(const Type* t) { + if(!t->isArray()) + return false; + const auto subArr = t->as()->getElementType(); + const auto dim1 = t->as()->getElementCount(); + if(!subArr->isArray()) + return false; + if(!subArr->as()->getElementType()->isPrimitive()) + return false; + const auto dim2 = subArr->as()->getElementCount(); + return dim1 == dim2; + } + + static bool findPhi(Value* val, PhiInst*& idx) { + if(val->is()) { + idx = val->as(); + return true; + } + intmax_t i1; + MatchContext matchCtx{ val }; + if(add(phi(idx), int_(i1))(matchCtx)) + return true; + if(mul(phi(idx), int_(i1))(matchCtx)) + return true; + if(add(mul(phi(idx), int_(i1)), int_(i1))(matchCtx)) + return true; + return false; + } + static void analysisAccessPattern(Value* ptr, PhiInst*& idx1, PhiInst*& idx2) { + if(!ptr->is()) + return; + auto gep = ptr->as(); + const auto base = gep->lastOperand(); + const auto lastIdx = gep->arguments().back(); + if(!findPhi(lastIdx, idx2 ? idx1 : idx2)) + return; + analysisAccessPattern(base, idx1, idx2); + } + + static void collectAccessPatterns(std::unordered_map>& access, Function* func, + AnalysisPassManager& analysis) { + auto& freq = analysis.get(*func); + if(!freq.isAvailable()) + return; + + auto& dom = analysis.get(*func); + auto& pointerBase = analysis.get(*func); + for(auto block : func->blocks()) { + const auto blockFreq = freq.query(block); + for(auto& inst : block->instructions()) { + if(inst.getInstID() == InstructionID::Load || inst.getInstID() == InstructionID::Store || + inst.getInstID() == InstructionID::AtomicAdd) { + const auto ptr = inst.getOperand(0); + const auto base = pointerBase.lookup(ptr); + if(!base || !base->isGlobal()) + continue; + + if(auto iter = access.find(base->as()); iter != access.end()) { + auto& [x, y] = iter->second; + PhiInst *idx1 = nullptr, *idx2 = nullptr; + // access A[idx1][idx2] + analysisAccessPattern(ptr, idx1, idx2); + // std::cerr << "analysis "; + // ptr->dumpAsOperand(std::cerr); + // std::cerr << " ["; + // if(idx1) + // idx1->dumpAsOperand(std::cerr); + // else + // std::cerr << "unknown"; + // std::cerr << "]["; + // if(idx2) + // idx2->dumpAsOperand(std::cerr); + // else + // std::cerr << "unknown"; + // std::cerr << "]\n"; + + if(idx1 && idx2) { + if(dom.dominate(idx2->getBlock(), idx1->getBlock())) + y += blockFreq; + else + x += blockFreq; + } + } + } + } + } + } + + static void trace(IRBuilder& builder, GetElementPtrInst* gep, Value*& idx1, Value*& idx2, int32_t dim) { + if(dim <= 0) + return; + auto addIdx = [&](int32_t d, Value* val) { + if(val->is() && val->as()->getSignExtended() == 0) + return; + auto& idx = d == 2 ? idx2 : idx1; + if(idx) + idx = builder.makeOp(InstructionID::Add, val, idx); + else + idx = val; + }; + + if(gep->operands().size() == 2) { + const auto idx = gep->getOperand(0); + const auto base = gep->getOperand(1); + addIdx(dim, idx); + if(base->is()) + trace(builder, base->as(), idx1, idx2, dim); + } else if(gep->operands().size() == 3) { + const auto offset = gep->getOperand(0); + const auto idx = gep->getOperand(1); + const auto base = gep->getOperand(2); + addIdx(dim, idx); + addIdx(dim - 1, offset); + if(base->is()) + trace(builder, base->as(), idx1, idx2, dim - 1); + } + } + + static void applyTranspose(Module& mod, AnalysisPassManager& analysis, GlobalVariable* var) { + for(auto global : mod.globals()) { + if(!global->isFunction()) + continue; + const auto func = global->as(); + if(func->blocks().empty()) + continue; + + auto& pointerBase = analysis.get(*func); + for(auto block : func->blocks()) { + for(auto& inst : block->instructions()) { + if(inst.getType()->isPointer() && inst.getInstID() != InstructionID::FunctionPtr) { + const auto base = pointerBase.lookup(&inst); + if(base == var && inst.getType()->as()->getPointee()->isPrimitive()) { + switch(inst.getInstID()) { + case InstructionID::GetElementPtr: { + IRBuilder builder{ analysis.module().getTarget() }; + builder.setInsertPoint(block, inst.asIterator()); + Value *idx1 = nullptr, *idx2 = nullptr; + trace(builder, inst.as(), idx1, idx2, 2); + const auto zero = ConstantInteger::get(idx1->getType(), 0); + Value* newPtr1 = builder.makeOp(var, std::vector{ zero, idx2 }); + Value* newPtr2 = + builder.makeOp(newPtr1, std::vector{ zero, idx1 }); + inst.replaceWith(newPtr2); + } break; + default: + break; + } + } + } + } + } + } + } + +public: + bool run(Module& mod, AnalysisPassManager& analysis) const override { + std::unordered_map> todo; + std::unordered_set unsupportedAddressing; + for(auto global : mod.globals()) { + if(!global->isFunction()) { + const auto var = global->as(); + if(var->attr().hasAttr(GlobalVariableAttribute::Flexible) && !var->initialValue()) { + if(var->attr().hasAttr(GlobalVariableAttribute::Transposed)) + continue; + const auto type = var->getType()->as()->getPointee(); + if(isArray2D(type)) + todo.emplace(var, std::pair{ 0, 0 }); + } + } else { + const auto func = global->as(); + if(func->blocks().empty()) + continue; + auto& pointerBase = analysis.get(*func); + for(auto block : func->blocks()) { + for(auto& inst : block->instructions()) { + if(inst.getType()->isPointer() && inst.getInstID() != InstructionID::FunctionPtr) { + const auto base = pointerBase.lookup(&inst); + if(!base || !(base->is() || base->isGlobal())) + return false; + if(inst.getInstID() == InstructionID::Phi || inst.getInstID() == InstructionID::Select) + unsupportedAddressing.insert(base); + } + } + } + } + } + + for(auto key : unsupportedAddressing) + if(key->isGlobal()) + todo.erase(key->as()); + + if(todo.empty()) + return false; + + for(auto global : mod.globals()) { + if(!global->isFunction()) + continue; + const auto func = global->as(); + if(func->blocks().empty()) + continue; + + collectAccessPatterns(todo, func, analysis); + } + bool modified = false; + + for(auto& [var, score] : todo) { + const auto [normal, transposed] = score; + // var->dumpAsOperand(std::cerr); + // std::cerr << " : " << normal << " " << transposed << "\n"; + if(normal * 4 + 1e-8 < transposed) { + var->attr().addAttr(GlobalVariableAttribute::Transposed); + applyTranspose(mod, analysis, var); + modified = true; + } + } + + return modified; + } + + [[nodiscard]] std::string_view name() const noexcept override { + return "Relayout"sv; + } +}; + +CMMC_TRANSFORM_PASS(Relayout); + +class RelayoutFuncLevel final : public TransformPass { +public: + bool run(Function&, AnalysisPassManager& analysis) const override { + auto& mod = analysis.module(); + Relayout relayout; + if(relayout.run(mod, analysis)) { + analysis.invalidateModule(); + return true; + } + return false; + } + + [[nodiscard]] std::string_view name() const noexcept override { + return "RelayoutFuncLevel"sv; + } +}; + +CMMC_TRANSFORM_PASS(RelayoutFuncLevel); + +CMMC_NAMESPACE_END diff --git a/cmmc/Transforms/TransformPass.cpp b/cmmc/Transforms/TransformPass.cpp index b26902f1a..adabc4063 100644 --- a/cmmc/Transforms/TransformPass.cpp +++ b/cmmc/Transforms/TransformPass.cpp @@ -289,6 +289,7 @@ std::shared_ptr> PassManager::get(OptimizationLevel "StoreOnlyGlobalEliminate", // "GlobalEliminate", // "DeadArgEliminate", // + "Relayout", // })) globalOpt->addPass(pass); @@ -413,6 +414,7 @@ std::shared_ptr> PassManager::get(OptimizationLevel "LoopEliminate", // clean up "FinalValueReplacement", // clean up "SCCEliminate", // clean up + "RelayoutFuncLevel", // clean up "LoopParallel", // "LoopUnroll", // "BlockMerge", // clean up diff --git a/educg/cmmc_file_filter.py b/educg/cmmc_file_filter.py index 628563d51..aa5093898 100644 --- a/educg/cmmc_file_filter.py +++ b/educg/cmmc_file_filter.py @@ -5,5 +5,5 @@ cmmc_root = sys.argv[1] for root, dirs, files in os.walk(cmmc_root): for file in files: - if not (file.endswith('.hpp') or file.endswith('.cpp') or file.startswith('LICENSE') or file.startswith('Makefile')): + if not (file.endswith('.yml') or file.endswith('.jinja2') or file.endswith('.py') or file.endswith('.hpp') or file.endswith('.cpp') or file.startswith('LICENSE') or file.startswith('Makefile')): os.remove(root + '/' + file) diff --git a/tests/SysY2022/functional/66_exgcd.riscv.s b/tests/SysY2022/functional/66_exgcd.riscv.s index 70e2bbafe..e63ce841a 100644 --- a/tests/SysY2022/functional/66_exgcd.riscv.s +++ b/tests/SysY2022/functional/66_exgcd.riscv.s @@ -14,11 +14,11 @@ exgcd: sd s2, 32(sp) mv s2, a3 sd s4, 40(sp) - bne a1, zero, label3 + bne a1, zero, label6 li a2, 1 mv a1, zero sw a2, 0(s3) -label9: +label3: sw a1, 0(s2) ld ra, 0(sp) ld s0, 8(sp) @@ -28,9 +28,9 @@ label9: ld s4, 40(sp) addi sp, sp, 48 ret -label3: +label6: remw s4, s0, s1 - bne s4, zero, label8 + bne s4, zero, label11 li a2, 1 mv a0, s1 sw a2, 0(s3) @@ -42,8 +42,8 @@ label3: lw a4, 0(s2) mulw a5, a3, a4 subw a1, a2, a5 - j label9 -label8: + j label3 +label11: remw a1, s1, s4 mv a0, s4 mv a2, s3 @@ -64,7 +64,7 @@ label8: lw a4, 0(s2) mulw a5, a3, a4 subw a1, a2, a5 - j label9 + j label3 .p2align 2 .globl main main: diff --git a/tests/SysY2022/functional/75_max_flow.riscv.s b/tests/SysY2022/functional/75_max_flow.riscv.s index 8b72f4138..167098ce7 100644 --- a/tests/SysY2022/functional/75_max_flow.riscv.s +++ b/tests/SysY2022/functional/75_max_flow.riscv.s @@ -34,16 +34,34 @@ dfs: sd a1, 136(sp) sd a4, 128(sp) sd a0, 120(sp) - beq a0, a1, label57 -pcrel379: + bne a0, a1, label4 + mv a0, a2 +label2: + ld ra, 0(sp) + ld s0, 8(sp) + ld s5, 16(sp) + ld s1, 24(sp) + ld s6, 32(sp) + ld s4, 40(sp) + ld s3, 48(sp) + ld s2, 56(sp) + ld s10, 64(sp) + ld s9, 72(sp) + ld s8, 80(sp) + ld s7, 88(sp) + ld s11, 96(sp) + addi sp, sp, 216 + ret +label4: auipc a1, %pcrel_hi(to) pcrel380: auipc a4, %pcrel_hi(cap) - addi a2, a1, %pcrel_lo(pcrel379) + addi a2, a1, %pcrel_lo(label4) addi a3, a4, %pcrel_lo(pcrel380) sd a2, 160(sp) + ld a5, 120(sp) sd a3, 112(sp) - sh2add a0, a0, a0 + sh2add a0, a5, a5 ld a4, 128(sp) sh3add a1, a0, a2 sh2add t0, a5, a4 @@ -54,7 +72,6 @@ pcrel380: sd a1, 168(sp) sd zero, 184(sp) mv a1, zero - ld a5, 120(sp) ld a3, 144(sp) sh2add a0, a5, a3 lw a2, 0(a0) @@ -153,163 +170,138 @@ label21: sh2add a1, s5, a4 li a0, 1 sw a0, 0(a1) -label25: ld a3, 144(sp) - sh2add a1, s5, a3 - lw a0, 0(a1) - blt s6, a0, label29 + sh2add a0, s5, a3 + lw a1, 0(a0) + blt zero, a1, label29 j label158 -label45: +label30: addiw s6, s6, 1 addi s3, s3, 4 addi s2, s2, 4 ld a3, 144(sp) - sh2add a1, s5, a3 - lw a0, 0(a1) - bge s6, a0, label158 + sh2add a0, s5, a3 + lw a1, 0(a0) + bge s6, a1, label158 label29: lw s10, 0(s3) ld a4, 128(sp) sh2add a1, s10, a4 lw a0, 0(a1) - bne a0, zero, label45 + bne a0, zero, label30 lw a0, 0(s2) - ble a0, zero, label45 + ble a0, zero, label30 min s9, s4, a0 ld a1, 136(sp) - bne a1, s10, label32 - mv a0, s9 -label42: - bgt a0, zero, label44 - addiw s6, s6, 1 - addi s3, s3, 4 - addi s2, s2, 4 - j label25 -label57: - ld a2, 152(sp) - mv a0, a2 -label2: - ld ra, 0(sp) - ld s0, 8(sp) - ld s5, 16(sp) - ld s1, 24(sp) - ld s6, 32(sp) - ld s4, 40(sp) - ld s3, 48(sp) - ld s2, 56(sp) - ld s10, 64(sp) - ld s9, 72(sp) - ld s8, 80(sp) - ld s7, 88(sp) - ld s11, 96(sp) - addi sp, sp, 216 - ret -label44: - lw a3, 0(s2) - sh2add a1, s5, s5 - mv s4, a0 - subw a2, a3, a0 - sw a2, 0(s2) - ld a3, 192(sp) - lw a2, 0(s3) - sh3add a5, a1, a3 - sh2add a4, s6, a5 - sh2add a5, a2, a2 - lw a1, 0(a4) - ld a3, 112(sp) - sh3add a4, a5, a3 - sh2add a2, a1, a4 - lw t0, 0(a2) - addw a3, a0, t0 - sw a3, 0(a2) -label46: - ble s4, zero, label250 - lw a0, 0(s0) - subw a3, a0, s4 - sw a3, 0(s0) - ld a2, 200(sp) - ld a3, 192(sp) - sh2add a1, a2, a2 - ld a0, 104(sp) - sh3add a5, a1, a3 - sh2add a2, s1, a5 - lw a4, 0(a0) - sh2add a5, a4, a4 - lw a1, 0(a2) - ld a3, 112(sp) - sh3add a0, a5, a3 - sh2add a2, a1, a0 - mv a0, s4 - lw a4, 0(a2) - addw a3, s4, a4 - sw a3, 0(a2) - j label13 -label32: + beq a1, s10, label180 sh2add a1, s10, s10 ld a2, 160(sp) mv s11, zero sh3add s8, a1, a2 ld a3, 112(sp) - ld a4, 128(sp) sh3add s7, a1, a3 sh2add a0, s10, a4 li a1, 1 sw a1, 0(a0) +label34: ld a3, 144(sp) sh2add a1, s10, a3 lw a2, 0(a1) - blt zero, a2, label37 - j label189 -label38: + blt s11, a2, label38 + j label192 +label42: addiw s11, s11, 1 addi s8, s8, 4 addi s7, s7, 4 ld a3, 144(sp) sh2add a1, s10, a3 lw a2, 0(a1) - bge s11, a2, label189 -label37: + bge s11, a2, label192 +label38: lw a0, 0(s8) ld a4, 128(sp) sh2add a2, a0, a4 lw a1, 0(a2) - bne a1, zero, label38 + bne a1, zero, label42 lw a1, 0(s7) - ble a1, zero, label38 + ble a1, zero, label42 min a2, s9, a1 ld a1, 136(sp) ld a3, 144(sp) jal dfs - ble a0, zero, label38 + ble a0, zero, label211 lw a2, 0(s7) sh2add a1, s10, s10 subw a4, a2, a0 sw a4, 0(s7) ld a3, 192(sp) lw a2, 0(s8) - sh3add a5, a1, a3 - sh2add a4, s11, a5 - sh2add a5, a2, a2 - lw a1, 0(a4) + sh3add a4, a1, a3 + sh2add a5, s11, a4 + sh2add a4, a2, a2 + lw a1, 0(a5) ld a3, 112(sp) - sh3add t0, a5, a3 + sh3add t0, a4, a3 sh2add a2, a1, t0 + lw a5, 0(a2) + addw a3, a0, a5 + sw a3, 0(a2) + j label43 +label180: + mv a0, s9 +label43: + ble a0, zero, label30 + lw a2, 0(s2) + sh2add a1, s5, s5 + mv s4, a0 + subw a4, a2, a0 + sw a4, 0(s2) + ld a3, 192(sp) + lw a2, 0(s3) + sh3add a4, a1, a3 + sh2add a5, s6, a4 + sh2add a4, a2, a2 + lw a1, 0(a5) + ld a3, 112(sp) + sh3add a5, a4, a3 + sh2add a2, a1, a5 + lw t0, 0(a2) + addw a3, a0, t0 + sw a3, 0(a2) +label46: + ble s4, zero, label250 + lw a0, 0(s0) + subw a3, a0, s4 + sw a3, 0(s0) + ld a2, 200(sp) + ld a3, 192(sp) + sh2add a1, a2, a2 + ld a0, 104(sp) + sh3add a5, a1, a3 + sh2add a2, s1, a5 + lw a4, 0(a0) + sh2add a5, a4, a4 + lw a1, 0(a2) + ld a3, 112(sp) + sh3add a0, a5, a3 + sh2add a2, a1, a0 + mv a0, s4 lw a4, 0(a2) - addw a3, a0, a4 + addw a3, s4, a4 sw a3, 0(a2) - j label42 + j label13 label73: mv a0, zero j label2 +label158: + mv s4, zero + j label46 label127: mv a0, zero j label13 -label189: +label192: mv a0, zero - j label42 -label158: - mv s4, zero - j label46 + j label43 label250: addiw s1, s1, 1 addi s0, s0, 4 @@ -333,12 +325,17 @@ label15: sh2add t0, a5, a5 lw a2, 0(a4) ld a3, 112(sp) - sh3add t1, t0, a3 - sh2add a1, a2, t1 - lw a4, 0(a1) - addw a3, a0, a4 + sh3add a4, t0, a3 + sh2add a1, a2, a4 + lw a5, 0(a1) + addw a3, a0, a5 sw a3, 0(a1) j label2 +label211: + addiw s11, s11, 1 + addi s8, s8, 4 + addi s7, s7, 4 + j label34 .p2align 2 .globl main main: @@ -384,9 +381,9 @@ label384: jal getint mv s9, a0 jal getint - addiw s6, s6, -1 sh2add a2, s8, s8 lw a1, 0(s7) + addiw s6, s6, -1 sh3add a4, a2, s4 sh3add a5, a2, s3 sh2add a3, a1, a5 @@ -415,7 +412,7 @@ label384: sw a1, 0(a0) bgt s6, zero, label384 label407: - mv t2, zero + mv t1, zero label386: sd zero, 128(sp) li a0, 1 @@ -429,7 +426,7 @@ label386: mv a4, s2 jal dfs bne a0, zero, label469 - mv a0, t2 + mv a0, t1 jal putint li a0, 10 jal putch @@ -448,5 +445,5 @@ label386: addi sp, sp, 168 ret label469: - addw t2, t2, a0 + addw t1, t1, a0 j label386 diff --git a/tests/SysY2022/functional/93_nested_calls.riscv.s b/tests/SysY2022/functional/93_nested_calls.riscv.s index 7e6cff0b1..75934ffd6 100644 --- a/tests/SysY2022/functional/93_nested_calls.riscv.s +++ b/tests/SysY2022/functional/93_nested_calls.riscv.s @@ -30,16 +30,16 @@ label2: jal getint addiw s6, s6, 1 sw a0, 0(s4) - bge s6, s5, label5 + bge s6, s5, label6 addi s4, s4, 4 j label2 -label5: +label6: sltiu a0, s0, 1 sltu a1, zero, s1 and a2, a0, a1 - beq s2, zero, label6 + beq s2, zero, label7 remw a2, a2, s2 -label6: +label7: ld a1, 64(sp) ld t0, 72(sp) srai a5, a1, 32 @@ -76,12 +76,9 @@ label161: label163: addiw a6, a7, 1 srai t3, t5, 32 - bne t3, zero, label22 - mv a2, t4 - j label8 -label22: + beq t3, zero, label79 remw a2, t4, t3 -label8: +label9: ld a7, 96(sp) srai t4, a7, 32 sext.w t5, a7 @@ -97,10 +94,10 @@ label165: bne a0, zero, label167 mv s4, s5 label167: - mulw s5, a2, s4 + mulw s6, a2, s4 mv a7, a4 bne a6, zero, label169 - mv a7, s5 + mv a7, s6 label169: sltiu s4, s2, 1 mv a2, s4 @@ -109,9 +106,9 @@ label169: mv a2, a6 label171: addiw t6, a2, 1 - beq t6, zero, label11 + beq t6, zero, label12 remw s1, s1, t6 -label11: +label12: addw t6, a7, s1 mv a2, a7 beq s1, zero, label173 @@ -124,19 +121,19 @@ label173: mv a6, a7 label175: mulw a2, t6, a6 - beq a3, zero, label13 + beq a3, zero, label14 remw a2, a2, a3 -label13: +label14: subw a3, t1, t0 mv a4, t1 beq t0, zero, label177 mv a4, a3 label177: addiw a3, a4, 1 - beq t2, zero, label16 + beq t2, zero, label17 subw a4, zero, t2 remw a3, a3, a4 -label16: +label17: sltiu t0, t5, 1 mv a4, t3 subw a5, t3, t0 @@ -144,10 +141,10 @@ label16: mv a4, a5 label179: mulw a3, a3, a4 - beq t4, zero, label18 + beq t4, zero, label19 subw a4, zero, t4 remw a3, a3, a4 -label18: +label19: addw a5, s0, a3 mv a4, a3 bne a0, zero, label181 @@ -170,3 +167,6 @@ label183: ld s3, 56(sp) addi sp, sp, 104 ret +label79: + mv a2, t4 + j label9 diff --git a/tests/SysY2022/hidden_functional/09_BFS.riscv.s b/tests/SysY2022/hidden_functional/09_BFS.riscv.s index 3b135eee8..a7981be85 100644 --- a/tests/SysY2022/hidden_functional/09_BFS.riscv.s +++ b/tests/SysY2022/hidden_functional/09_BFS.riscv.s @@ -36,78 +36,81 @@ main: sd s10, 88(sp) sd s11, 96(sp) jal getch + li s3, 10 li s1, -1 -pcrel934: + addiw a1, a0, -48 +pcrel935: auipc a2, %pcrel_hi(que) - li s3, 10 li s4, 9 -pcrel935: +pcrel936: auipc a3, %pcrel_hi(to) li s2, 1 - addiw a1, a0, -48 - addi t1, a2, %pcrel_lo(pcrel934) - addi t0, a3, %pcrel_lo(pcrel935) -pcrel936: + addi t1, a2, %pcrel_lo(pcrel935) + addi t0, a3, %pcrel_lo(pcrel936) +pcrel937: auipc a2, %pcrel_hi(next) sd t1, 104(sp) -pcrel937: +pcrel938: auipc a3, %pcrel_hi(inq) - addi a5, a2, %pcrel_lo(pcrel936) - addi s0, a3, %pcrel_lo(pcrel937) + addi a5, a2, %pcrel_lo(pcrel937) + addi s0, a3, %pcrel_lo(pcrel938) sd t0, 112(sp) sd a5, 120(sp) - bleu a1, s4, label2 + bleu a1, s4, label3 .p2align 2 -label102: +label2: jal getch addiw a1, a0, -48 - bgtu a1, s4, label102 -label2: + bgtu a1, s4, label2 +label3: addiw a1, a0, -48 - bgeu a1, s3, label4 + bgeu a1, s3, label6 .p2align 2 -label101: +label5: jal getch addiw a1, a0, -48 - bltu a1, s3, label101 -label4: + bltu a1, s3, label5 +label6: jal getch addiw a1, a0, -48 - bleu a1, s4, label125 + bleu a1, s4, label137 mv s5, a0 mv s6, zero + j label7 .p2align 2 -label5: +label146: + mv s5, a0 + mv s6, a1 +.p2align 2 +label7: jal getch - li a3, 45 li a1, 1 + li a3, 45 addiw a2, a0, -48 beq s5, a3, label847 mv a1, s6 label847: - bleu a2, s4, label133 - mv s5, a0 - mv s6, a1 - j label5 -label133: - mv s6, a0 + bgtu a2, s4, label146 mv s5, a1 -label8: - addiw a0, s6, -48 - bgeu a0, s3, label138 +label10: + addiw a2, a0, -48 + bgeu a2, s3, label150 + mv s6, a0 mv s7, zero + j label100 +.p2align 2 +label525: + mv s6, a0 .p2align 2 -label11: +label100: jal getch - sh2add a4, s7, s7 + sh2add a3, s7, s7 addiw a1, a0, -48 - slliw a3, a4, 1 - addi a2, a3, -48 - addw s7, s6, a2 - bgeu a1, s3, label14 - mv s6, a0 - j label11 -label14: + slliw a2, a3, 1 + addi a4, a2, -48 + addw s7, s6, a4 + bltu a1, s3, label525 +label13: subw a0, zero, s7 mv s6, a0 bne s5, zero, label849 @@ -117,12 +120,12 @@ label849: mv a2, zero addi s5, a1, %pcrel_lo(label849) mv a0, s5 - j label16 + j label15 .p2align 2 -label19: +label18: addi a0, a0, 256 .p2align 2 -label16: +label15: li a4, -1 addiw a2, a2, 64 slli a3, a4, 32 @@ -160,7 +163,7 @@ label16: sd a1, 232(a0) sd a1, 240(a0) sd a1, 248(a0) - blt a2, a3, label19 + blt a2, a3, label18 li a3, -1 slli a2, a3, 32 add.uw a1, s1, a2 @@ -187,80 +190,108 @@ label16: sd a1, 416(a0) sd a1, 424(a0) sw s1, 432(a0) - beq s6, zero, label21 + beq s6, zero, label99 mv s7, zero - j label22 + j label20 .p2align 2 -label66: +label89: addi a2, a2, 4 .p2align 2 -label63: +label86: lw a4, 0(a2) addiw a3, a3, 1 sh2add a5, a4, s0 sw zero, 0(a5) - bgt a0, a3, label66 + bgt a0, a3, label89 .p2align 2 -label53: +label76: mv a0, a1 jal putint mv a0, s3 jal putch addiw s6, s6, -1 - beq s6, zero, label21 + beq s6, zero, label99 .p2align 2 -label22: +label20: jal getch - xori a2, a0, 85 - xori a3, a0, 81 - sltu a4, zero, a2 - sltu a1, zero, a3 - and a3, a1, a4 - bne a3, zero, label22 + xori a4, a0, 85 + xori a2, a0, 81 + sltu a3, zero, a4 + sltu a1, zero, a2 + and a2, a1, a3 + bne a2, zero, label20 li a1, 81 - bne a0, a1, label78 + bne a0, a1, label26 jal getch addiw a1, a0, -48 + bleu a1, s4, label371 mv s8, a0 - bleu a1, s4, label287 mv s9, zero - j label27 + j label50 .p2align 2 -label296: +label26: + jal getch + addiw a1, a0, -48 + bleu a1, s4, label295 mv s8, a0 - mv s9, a2 + mv s9, zero .p2align 2 label27: jal getch - li a3, 45 li a2, 1 + li a3, 45 addiw a1, a0, -48 beq s8, a3, label853 mv a2, s9 label853: - bgtu a1, s4, label296 + bleu a1, s4, label303 mv s8, a0 mv s9, a2 - addiw a0, a0, -48 - bgeu a0, s3, label886 + j label27 +.p2align 2 +label380: + mv s8, a0 + mv s9, a2 +.p2align 2 +label50: + jal getch + li a2, 1 + li a3, 45 + addiw a1, a0, -48 + beq s8, a3, label861 + mv a2, s9 +label861: + bgtu a1, s4, label380 + mv s9, a2 + addiw a1, a0, -48 + bgeu a1, s3, label890 .p2align 2 -label301: +label385: + mv s8, a0 mv s10, zero - j label73 + j label96 .p2align 2 -label432: +label303: mv s8, a0 + mv s9, a2 + addiw a0, a0, -48 + bgeu a0, s3, label886 +.p2align 2 +label309: + mv s10, zero .p2align 2 -label73: +label33: jal getch - sh2add a2, s10, s10 + sh2add a3, s10, s10 addiw a1, a0, -48 - slliw a4, a2, 1 - addi a3, a4, -48 - addw s10, s8, a3 - bltu a1, s3, label432 + slliw a4, a3, 1 + addi a2, a4, -48 + addw s10, s8, a2 + bgeu a1, s3, label36 + mv s8, a0 + j label33 .p2align 2 -label33: +label36: jal getch subw a2, zero, s10 addiw a1, a0, -48 @@ -268,47 +299,133 @@ label33: bne s9, zero, label855 mv s8, s10 label855: - bleu a1, s4, label309 + bleu a1, s4, label326 mv s9, a0 mv s10, zero - j label70 .p2align 2 -label423: +label38: + jal getch + li a2, 1 + li a3, 45 + addiw a1, a0, -48 + beq s9, a3, label857 + mv a2, s10 +label857: + bleu a1, s4, label334 + mv s9, a0 + mv s10, a2 + j label38 +.p2align 2 +label334: + mv s9, a2 + addiw a1, a0, -48 + bgeu a1, s3, label887 +.p2align 2 +label340: + mv s10, a0 + mv s11, zero +.p2align 2 +label44: + jal getch + sh2add a3, s11, s11 + addiw a1, a0, -48 + slliw a4, a3, 1 + addi a2, a4, -48 + addw s11, s10, a2 + bgeu a1, s3, label348 + mv s10, a0 + j label44 +.p2align 2 +label516: + mv s8, a0 +.p2align 2 +label96: + jal getch + sh2add a3, s10, s10 + addiw a1, a0, -48 + slliw a4, a3, 1 + addi a2, a4, -48 + addw s10, s8, a2 + bltu a1, s3, label516 +.p2align 2 +label56: + jal getch + subw a2, zero, s10 + addiw a1, a0, -48 + mv s8, a2 + bne s9, zero, label863 + mv s8, s10 +label863: + bleu a1, s4, label393 + mv s9, a0 + mv s10, zero + j label93 +.p2align 2 +label507: mv s9, a0 mv s10, a2 .p2align 2 -label70: +label93: jal getch li a2, 1 li a3, 45 addiw a1, a0, -48 - beq s9, a3, label861 + beq s9, a3, label869 mv a2, s10 -label861: - bgtu a1, s4, label423 +label869: + bgtu a1, s4, label507 mv s9, a2 addiw a1, a0, -48 - bgeu a1, s3, label314 + bgeu a1, s3, label398 .p2align 2 -label315: +label399: mv s10, a0 mv s11, zero - j label67 .p2align 2 -label415: - mv s10, a0 -.p2align 2 -label67: +label90: jal getch sh2add a4, s11, s11 addiw a1, a0, -48 slliw a2, a4, 1 addi a3, a2, -48 addw s11, s10, a3 - bltu a1, s3, label415 + bgeu a1, s3, label498 + mv s10, a0 + j label90 +.p2align 2 +label348: subw a1, zero, s11 mv a0, a1 - bne s9, zero, label857 + bne s9, zero, label859 +.p2align 2 +label888: + mv a0, s11 +.p2align 2 +label859: + ld t0, 112(sp) + sh2add a4, s8, s5 + sh2add a3, a0, s5 + addiw s6, s6, -1 + sh2add a1, s7, t0 + sw a0, 0(a1) + lw t0, 0(a4) + ld a5, 120(sp) + sh2add a2, s7, a5 + sw t0, 0(a2) + sw s7, 0(a4) + sw s8, 4(a1) + addiw a1, s7, 1 + lw a0, 0(a3) + addiw s7, s7, 2 + sw a0, 4(a2) + sw a1, 0(a3) + bne s6, zero, label20 + j label99 +.p2align 2 +label498: + subw a1, zero, s11 + mv a0, a1 + bne s9, zero, label865 mv a0, s11 sh2add a1, s8, s0 mv a4, s2 @@ -320,220 +437,109 @@ label67: sh2add t0, a2, t1 sw s8, 4(t1) lw a5, 0(t0) - beq s11, a5, label859 + beq s11, a5, label867 mv a1, zero sh2add t0, a5, s5 lw a3, 0(t0) - bne a3, s1, label44 -label333: - blt a2, a4, label360 - j label359 + bne a3, s1, label67 +label417: + blt a2, a4, label444 + j label443 .p2align 2 -label47: +label72: + addiw a4, a4, 1 + sh2add t2, a5, s0 + ld t1, 104(sp) + sw s2, 0(t2) + sh2add t0, a4, t1 + sw a5, 0(t0) ld a5, 120(sp) sh2add t0, a3, a5 lw a3, 0(t0) - beq a3, s1, label350 + beq a3, s1, label893 .p2align 2 -label44: +label67: ld t0, 112(sp) sh2add t1, a3, t0 lw a5, 0(t1) sh2add t2, a5, s0 lw t0, 0(t2) - bne t0, zero, label47 - addiw a4, a4, 1 - ld t1, 104(sp) - sw s2, 0(t2) - sh2add t0, a4, t1 - sw a5, 0(t0) + beq t0, zero, label72 ld a5, 120(sp) sh2add t0, a3, a5 lw a3, 0(t0) - bne a3, s1, label44 - blt a2, a4, label360 - blt a4, zero, label53 + bne a3, s1, label67 + blt a2, a4, label444 + bge a4, zero, label77 + j label76 .p2align 2 -label54: +label893: + bge a2, a4, label901 +.p2align 2 +label444: + mv a3, a1 + addiw a2, a2, 1 + ld t1, 104(sp) + li a1, 1 + sh2add t0, a2, t1 + lw a5, 0(t0) + beq a0, a5, label867 + mv a1, a3 + sh2add t0, a5, s5 + lw a3, 0(t0) + bne a3, s1, label67 + blt a2, a4, label444 +label443: + bge a4, zero, label77 + j label76 +.p2align 2 +label901: + blt a4, zero, label76 +.p2align 2 +label77: addiw a0, a4, 1 li a2, 3 - ble a0, a2, label368 + ble a0, a2, label452 ld t1, 104(sp) mv a2, zero .p2align 2 -label56: +label79: lw a5, 0(t1) addiw a2, a2, 4 sh2add a3, a5, s0 sw zero, 0(a3) lw t0, 4(t1) - sh2add t2, t0, s0 - sw zero, 0(t2) - lw a5, 8(t1) - sh2add t0, a5, s0 + sh2add a5, t0, s0 + sw zero, 0(a5) + lw t2, 8(t1) + sh2add t0, t2, s0 sw zero, 0(t0) lw a3, 12(t1) sh2add a5, a3, s0 addiw a3, a4, -2 sw zero, 0(a5) - bge a2, a3, label389 + bge a2, a3, label473 addi t1, t1, 16 - j label56 -.p2align 2 -label350: - bge a2, a4, label888 -.p2align 2 -label360: - mv a3, a1 - addiw a2, a2, 1 - ld t1, 104(sp) - li a1, 1 - sh2add t0, a2, t1 - lw a5, 0(t0) - beq a0, a5, label859 - mv a1, a3 - sh2add t0, a5, s5 - lw a3, 0(t0) - bne a3, s1, label44 - blt a2, a4, label360 -label359: - bge a4, zero, label54 - j label53 -.p2align 2 -label78: - jal getch - addiw a1, a0, -48 - bleu a1, s4, label442 - mv s8, a0 - mv s9, zero j label79 .p2align 2 -label451: - mv s8, a0 - mv s9, a2 -.p2align 2 -label79: - jal getch - li a2, 1 - li a3, 45 - addiw a1, a0, -48 - beq s8, a3, label863 - mv a2, s9 -label863: - bgtu a1, s4, label451 - mv s9, a2 - addiw a1, a0, -48 - bgeu a1, s3, label896 -.p2align 2 -label456: - mv s8, a0 - mv s10, zero - j label85 -.p2align 2 -label465: - mv s8, a0 -.p2align 2 -label85: - jal getch - sh2add a4, s10, s10 - addiw a1, a0, -48 - slliw a3, a4, 1 - addi a2, a3, -48 - addw s10, s8, a2 - bltu a1, s3, label465 -.p2align 2 -label88: - jal getch - subw a2, zero, s10 - addiw a1, a0, -48 - mv s8, a2 - bne s9, zero, label865 - mv s8, s10 -label865: - bleu a1, s4, label473 - mv s9, a0 - mv s10, zero - j label90 -.p2align 2 -label482: - mv s9, a0 - mv s10, a2 -.p2align 2 -label90: - jal getch - li a2, 1 - li a3, 45 - addiw a1, a0, -48 - beq s9, a3, label867 - mv a2, s10 -label867: - bgtu a1, s4, label482 - mv s9, a2 - addiw a1, a0, -48 - bgeu a1, s3, label897 -.p2align 2 -label487: - mv s10, a0 - mv s11, zero -.p2align 2 -label96: - jal getch - sh2add a3, s11, s11 - addiw a1, a0, -48 - slliw a2, a3, 1 - addi a4, a2, -48 - addw s11, s10, a4 - bgeu a1, s3, label495 - mv s10, a0 - j label96 -.p2align 2 -label495: - subw a1, zero, s11 - mv a0, a1 - bne s9, zero, label869 -.p2align 2 -label898: - mv a0, s11 -.p2align 2 -label869: - ld t0, 112(sp) - sh2add a3, s8, s5 - sh2add a4, a0, s5 - addiw s6, s6, -1 - sh2add a1, s7, t0 - sw a0, 0(a1) - addiw a0, s7, 1 - lw t0, 0(a3) - ld a5, 120(sp) - sh2add a2, s7, a5 - sw t0, 0(a2) - sw s7, 0(a3) - addiw s7, s7, 2 - sw s8, 4(a1) - lw a5, 0(a4) - sw a5, 4(a2) - sw a0, 0(a4) - bne s6, zero, label22 - j label21 -.p2align 2 -label389: +label473: mv a3, a2 - ble a0, a2, label53 + ble a0, a2, label76 .p2align 2 -label62: +label85: ld t1, 104(sp) sh2add a2, a3, t1 - j label63 -label368: + j label86 +label452: mv a3, zero - bgt a0, zero, label62 - j label53 -label314: + bgt a0, zero, label85 + j label76 +label398: mv s11, zero mv a1, zero mv a0, zero - bne s9, zero, label857 -label857: + bne s9, zero, label865 +label865: sh2add a1, s8, s0 mv a4, s2 mv a3, zero @@ -544,14 +550,14 @@ label857: li a1, 1 sh2add t0, a2, t1 lw a5, 0(t0) - beq a0, a5, label859 + beq a0, a5, label867 mv a1, zero -label859: +label867: sh2add t0, a5, s5 lw a3, 0(t0) - bne a3, s1, label44 - j label333 -label21: + bne a3, s1, label67 + j label417 +label99: mv a0, zero ld ra, 0(sp) ld s0, 8(sp) @@ -568,43 +574,39 @@ label21: ld s11, 96(sp) addi sp, sp, 128 ret -label473: +label326: mv s9, zero addiw a1, a0, -48 - bltu a1, s3, label487 -label897: + bltu a1, s3, label340 +label887: mv s11, zero mv a1, zero mv a0, zero - bne s9, zero, label869 - j label898 -label287: + bne s9, zero, label859 + j label888 +label295: + mv s8, a0 mv s9, zero - addiw a0, s8, -48 - bltu a0, s3, label301 + addiw a0, a0, -48 + bltu a0, s3, label309 label886: mv s10, zero - j label33 -label442: + j label36 +label371: mv s9, zero addiw a1, a0, -48 - bltu a1, s3, label456 -label896: + bltu a1, s3, label385 +label890: mv s10, zero - j label88 -label309: + j label56 +label393: mv s9, zero addiw a1, a0, -48 - bltu a1, s3, label315 - j label314 -.p2align 2 -label888: - bge a4, zero, label54 - j label53 -label125: - mv s6, a0 + bltu a1, s3, label399 + j label398 +label137: mv s5, zero - j label8 -label138: + j label10 +label150: mv s7, zero - j label14 + j label13 diff --git a/tests/SysY2022/hidden_functional/19_search.riscv.s b/tests/SysY2022/hidden_functional/19_search.riscv.s index 73e84ca10..fc0cc139f 100644 --- a/tests/SysY2022/hidden_functional/19_search.riscv.s +++ b/tests/SysY2022/hidden_functional/19_search.riscv.s @@ -9,10 +9,9 @@ a: search: # stack usage: CalleeArg[0] Local[0] RegSpill[64] CalleeSaved[104] addi sp, sp, -168 - mv t0, a0 sd ra, 0(sp) sd s4, 8(sp) - mv s4, a1 + mv s4, a0 sd s3, 16(sp) sd s0, 24(sp) sd s5, 32(sp) @@ -35,7 +34,7 @@ search: sd a7, 104(sp) ble a2, a1, label67 lui a0, 262144 -label56: +label2: ld ra, 0(sp) ld s4, 8(sp) ld s3, 16(sp) @@ -58,140 +57,147 @@ label67: addi s3, a1, %pcrel_lo(label67) ld a3, 160(sp) mv s0, a3 - j label3 -label16: + j label5 +label18: ld a6, 128(sp) lw a3, 0(a6) - beq a2, a3, label17 -label18: + beq s4, a3, label19 +label20: addiw s6, a0, 1 li a3, 2 - blt a1, a3, label19 - beq a2, zero, label19 + blt a2, a3, label23 + beq s4, zero, label23 ld a5, 112(sp) lw a3, 0(a5) - addiw a1, a3, 1 - sltiu a3, s4, 1 - xor a4, a2, a1 - sltiu a0, a4, 1 - or a1, a0, a3 - beq a1, zero, label24 -label19: + addiw a4, a3, 1 + sltiu a3, a1, 1 + xor a2, s4, a4 + sltiu a0, a2, 1 + or a4, a0, a3 + beq a4, zero, label24 +label23: addi s0, s0, 8 mv a0, s6 -label3: +label5: li a1, 4 - bge a0, a1, label55 - ld t0, 152(sp) - ld s4, 144(sp) - slli a1, t0, 4 - sub a2, a1, t0 - mv a1, zero - sh3add s2, a2, s3 - mv a2, t0 + bge a0, a1, label57 + ld a1, 144(sp) + ld s4, 152(sp) + slli a2, a1, 4 + sub a3, a2, a1 + mv a2, zero + sh3add s2, a3, s3 sh2add a5, s4, s2 li a3, 1 lw a4, 0(a5) - bne a4, a3, label13 - j label16 + bne a4, a3, label15 + j label18 label87: - lw a3, 0(s0) - addiw a1, a1, 1 - slli a5, a3, 4 - addw a2, a2, a3 - lw a4, 4(s0) - subw t0, a5, a3 + lw a4, 0(s0) + addiw a2, a2, 1 + lw a3, 4(s0) addw s4, s4, a4 + addw a1, a1, a3 + slli a4, a3, 4 + subw a5, a4, a3 li a3, 1 - sh3add s2, t0, s2 + sh3add s2, a5, s2 sh2add a5, s4, s2 lw a4, 0(a5) - beq a4, a3, label16 + beq a4, a3, label18 .p2align 2 -label13: +label15: ld a6, 128(sp) lw a3, 0(a6) - bne a2, a3, label87 + bne s4, a3, label87 ld a7, 104(sp) lw a3, 0(a7) - beq s4, a3, label16 - lw a3, 0(s0) - addiw a1, a1, 1 - slli a5, a3, 4 - addw a2, a2, a3 - lw a4, 4(s0) - subw t0, a5, a3 + beq a1, a3, label18 + lw a4, 0(s0) + addiw a2, a2, 1 + lw a3, 4(s0) addw s4, s4, a4 + addw a1, a1, a3 + slli a4, a3, 4 + subw a5, a4, a3 li a3, 1 - sh3add s2, t0, s2 + sh3add s2, a5, s2 sh2add a5, s4, s2 lw a4, 0(a5) - bne a4, a3, label13 - j label16 -label17: + bne a4, a3, label15 + j label18 +label19: ld a7, 104(sp) lw a4, 0(a7) - beq s4, a4, label112 - j label18 + beq a1, a4, label112 + j label20 label24: ld a4, 120(sp) - lw a1, 0(a4) - addiw a0, a1, 1 - beq s4, a0, label19 - sh2add a1, s4, s2 - sw zero, 0(a1) + lw a0, 0(a4) + addiw a2, a0, 1 + beq a1, a2, label23 + sh2add a2, s4, s2 + sw zero, 0(a2) lw a0, 0(s0) - lw a1, 4(s0) - subw s7, a2, a0 - subw s8, s4, a1 + lw a3, 4(s0) + subw s7, s4, a0 ld a2, 136(sp) - li a1, 10 + subw s8, a1, a3 addiw a0, a2, 1 - bgt a0, a1, label151 + li a1, 10 + bgt a0, a1, label150 ld a3, 160(sp) lui s9, 262144 mv s10, zero mv s5, a3 - j label29 -label47: + j label28 +label51: + ld a7, 104(sp) + lw a3, 0(a7) + bne a1, a3, label50 +label38: + ld a6, 128(sp) + lw a3, 0(a6) + beq a2, a3, label39 +label40: + addiw s10, s10, 1 + li a3, 2 + blt a4, a3, label43 + bne a2, zero, label42 +label43: addi s5, s5, 8 -label29: +label28: li a0, 4 - bge s10, a0, label53 - slli a2, s7, 4 + bge s10, a0, label52 + slli a1, s8, 4 mv a4, zero - sub a1, a2, s7 - mv a2, s7 - sh3add a0, a1, s3 + sub a2, a1, s8 mv a1, s8 - j label34 -label40: - lw a3, 0(s5) + sh3add a0, a2, s3 + mv a2, s7 + j label33 +label50: + lw a5, 0(s5) addiw a4, a4, 1 - slli t1, a3, 4 - addw a2, a2, a3 - lw a5, 4(s5) - subw t0, t1, a3 - addw a1, a1, a5 - sh3add a0, t0, a0 -label34: - sh2add t0, a1, a0 + lw a3, 4(s5) + addw a2, a2, a5 + slli t0, a3, 4 + addw a1, a1, a3 + subw a5, t0, a3 + sh3add a0, a5, a0 +label33: + sh2add t0, a2, a0 li a3, 1 lw a5, 0(t0) - beq a5, a3, label42 + beq a5, a3, label38 ld a6, 128(sp) lw a3, 0(a6) - bne a2, a3, label40 - ld a7, 104(sp) - lw a3, 0(a7) - bne a1, a3, label40 -label42: - ld a6, 128(sp) - lw a3, 0(a6) - bne a2, a3, label43 + beq a2, a3, label51 + j label50 +label39: ld a7, 104(sp) lw a5, 0(a7) - bne a1, a5, label43 + bne a1, a5, label40 li a3, 1 sh2add a0, s4, s2 addi s0, s0, 8 @@ -199,30 +205,26 @@ label42: sw a3, 0(a0) min s1, s1, a1 mv a0, s6 - j label3 -label43: - addiw s10, s10, 1 - li a3, 2 - blt a4, a3, label47 - beq a2, zero, label47 + j label5 +label42: ld a5, 112(sp) - lw a4, 0(a5) + lw t0, 0(a5) sltiu a5, a1, 1 - addiw t1, a4, 1 - xor t0, a2, t1 - sltiu a3, t0, 1 + addiw a4, t0, 1 + xor t1, a2, a4 + sltiu a3, t1, 1 or a4, a3, a5 - bne a4, zero, label47 + bne a4, zero, label43 ld a4, 120(sp) - lw a5, 0(a4) - addiw a3, a5, 1 - beq a1, a3, label47 - sh2add s11, a1, a0 + lw a3, 0(a4) + addiw a5, a3, 1 + beq a1, a5, label43 + sh2add s11, a2, a0 sw zero, 0(s11) - lw a3, 0(s5) - lw a4, 4(s5) - subw a0, a2, a3 - subw a1, a1, a4 + lw a4, 0(s5) + lw a3, 4(s5) + subw a0, a2, a4 + subw a1, a1, a3 ld a2, 136(sp) ld a7, 104(sp) addiw a2, a2, 2 @@ -236,10 +238,19 @@ label43: addiw a1, a0, 1 sw a3, 0(s11) min s9, s9, a1 - j label29 -label151: + j label28 +label57: + lui a0, 262144 + li a1, 10 + bgt s1, a1, label2 + mv a0, s1 + j label2 +label52: lui a3, 262144 -label26: + li a1, 10 + bgt s9, a1, label53 + mv a3, s9 +label53: addiw a1, a3, 1 sh2add a0, s4, s2 addi s0, s0, 8 @@ -247,110 +258,99 @@ label26: min s1, s1, a1 sw a3, 0(a0) mv a0, s6 - j label3 -label55: - lui a0, 262144 - li a1, 10 - bgt s1, a1, label56 - mv a0, s1 - j label56 -label53: + j label5 +label150: lui a3, 262144 - li a1, 10 - bgt s9, a1, label26 - addiw a1, s9, 1 sh2add a0, s4, s2 - li a3, 1 addi s0, s0, 8 + addiw a1, a3, 1 + li a3, 1 min s1, s1, a1 sw a3, 0(a0) mv a0, s6 - j label3 + j label5 label112: li a3, 1 mv a0, a3 - j label56 + j label2 .p2align 2 .globl main main: # stack usage: CalleeArg[0] Local[48] RegSpill[0] CalleeSaved[104] - addi sp, sp, -152 - li a2, -1 + addi sp, sp, -160 + li a3, -1 + li a2, 1 + zext.w a1, a3 sd ra, 0(sp) - zext.w a0, a2 - sd s2, 8(sp) - addi s2, sp, 144 - sd s4, 16(sp) - addi s4, sp, 148 - sd s5, 24(sp) - addi s5, sp, 104 - sd s0, 32(sp) - li s0, 1 + sd s4, 8(sp) + addi s4, sp, 144 + sd s5, 16(sp) + addi s5, sp, 148 + sd s0, 24(sp) + mv s0, zero + sd s6, 32(sp) + addi s6, sp, 152 sd s1, 40(sp) - zext.w a1, s0 - sd s6, 48(sp) - mv s6, zero + li s1, 1 + sd s7, 48(sp) + zext.w a0, s1 + addi s7, sp, 104 sd s3, 56(sp) - sd s8, 64(sp) + slli s3, a2, 32 + sd s2, 64(sp) + sd s8, 72(sp) mv s8, zero - sd s7, 72(sp) - sd s10, 80(sp) - sd s11, 88(sp) - sd s9, 96(sp) - sd a1, 112(sp) - li a1, 1 - sd a0, 120(sp) - slli s1, a1, 32 - sub a0, zero, s1 - sd s1, 128(sp) + sd s9, 80(sp) + sd s10, 88(sp) + sd s11, 96(sp) + sd a0, 112(sp) + sub a0, zero, s3 + sd a1, 120(sp) + sd s3, 128(sp) sd a0, 136(sp) -pcrel1034: +pcrel1028: auipc a0, %pcrel_hi(a) sw zero, 148(sp) - addi s3, a0, %pcrel_lo(pcrel1034) + addi s2, a0, %pcrel_lo(pcrel1028) sw zero, 144(sp) - j label333 + j label332 .p2align 2 -label1015: - mv s6, s10 +label589: + mv s0, s10 mv s8, s11 .p2align 2 -label343: +label360: addi a3, sp, 112 - addi a4, sp, 108 mv a0, s8 - mv a1, s6 - mv a2, s0 - mv a5, s5 - mv a6, s4 - mv a7, s2 + mv a1, s0 + mv a2, s1 + mv a4, s7 + mv a5, s6 + mv a6, s5 + mv a7, s4 jal search mv a1, a0 li a2, 11 - blt a0, a2, label1001 + blt a0, a2, label998 li a1, -1 -label1001: +label998: mv a0, a1 jal putint li a0, 10 jal putch .p2align 2 -label333: - jal getint - sw a0, 108(sp) - mv s7, a0 +label332: jal getint sw a0, 104(sp) - beq s7, zero, label336 - mv a0, s3 + mv s9, a0 + jal getint + sw a0, 152(sp) + beq s9, zero, label363 + mv a0, s2 mv a2, zero - j label338 .p2align 2 -label341: - addi a0, a0, 480 -.p2align 2 -label338: - ori a1, s1, 1 +label336: + ori a1, s3, 1 addiw a2, a2, 4 li a3, 28 sd a1, 0(a0) @@ -413,7 +413,12 @@ label338: sd a1, 456(a0) sd a1, 464(a0) sd a1, 472(a0) - blt a2, a3, label341 + bge a2, a3, label340 + addi a0, a0, 480 + j label336 +.p2align 2 +label340: + ori a1, s3, 1 sd a1, 480(a0) sd a1, 488(a0) sd a1, 496(a0) @@ -444,89 +449,81 @@ label338: sd a1, 696(a0) sd a1, 704(a0) sd a1, 712(a0) - lw a2, 104(sp) - ble a2, zero, label343 - addi s7, s3, 120 - mv s10, s6 + lw a2, 152(sp) + ble a2, zero, label360 + mv s10, s0 mv s11, s8 - mv s6, s0 - lw a0, 108(sp) - bgt a0, zero, label352 - addiw s6, s0, 1 - lw a0, 104(sp) - ble s6, a0, label367 - j label1015 + mv s0, s1 + j label341 .p2align 2 -label597: - li a1, 3 - bne a0, a1, label1016 - sw s6, 148(sp) +label355: + sw s0, 148(sp) sw s9, 144(sp) addiw s9, s9, 1 - lw a0, 108(sp) + lw a0, 104(sp) bgt s9, a0, label607 .p2align 2 -label363: - addi s8, s8, 4 +label359: + addi s8, s8, 120 .p2align 2 -label353: +label349: jal getint - li a1, 2 - sw a0, 0(s8) - bne a0, a1, label597 + li a2, 2 + sh2add a1, s0, s8 + sw a0, 0(a1) + bne a0, a2, label597 mv s10, s9 - mv s11, s6 - lw a0, 108(sp) - addiw s9, s9, 1 - ble s9, a0, label363 - addiw s6, s6, 1 + mv s11, s0 lw a0, 104(sp) - bgt s6, a0, label1015 -.p2align 2 -label367: - addi s7, s7, 120 - lw a0, 108(sp) - ble a0, zero, label1020 + addiw s9, s9, 1 + ble s9, a0, label359 + addiw s0, s0, 1 + lw a0, 152(sp) + bgt s0, a0, label589 .p2align 2 -label352: - addi s8, s7, 4 - mv s9, s0 - j label353 +label341: + lw a0, 104(sp) + ble a0, zero, label345 + addi s8, s2, 120 + mv s9, s1 + j label349 .p2align 2 -label1016: +label597: + li a1, 3 + beq a0, a1, label355 addiw s9, s9, 1 - lw a0, 108(sp) - ble s9, a0, label363 - addiw s6, s6, 1 lw a0, 104(sp) - ble s6, a0, label367 - j label1015 + ble s9, a0, label359 + addiw s0, s0, 1 + lw a0, 152(sp) + ble s0, a0, label341 + j label589 +.p2align 2 +label345: + addiw s0, s0, 1 + lw a0, 152(sp) + ble s0, a0, label341 + j label589 .p2align 2 label607: - addiw s6, s6, 1 - lw a0, 104(sp) - ble s6, a0, label367 - j label1015 -label336: + addiw s0, s0, 1 + lw a0, 152(sp) + ble s0, a0, label341 + j label589 +label363: mv a0, zero ld ra, 0(sp) - ld s2, 8(sp) - ld s4, 16(sp) - ld s5, 24(sp) - ld s0, 32(sp) + ld s4, 8(sp) + ld s5, 16(sp) + ld s0, 24(sp) + ld s6, 32(sp) ld s1, 40(sp) - ld s6, 48(sp) + ld s7, 48(sp) ld s3, 56(sp) - ld s8, 64(sp) - ld s7, 72(sp) - ld s10, 80(sp) - ld s11, 88(sp) - ld s9, 96(sp) - addi sp, sp, 152 + ld s2, 64(sp) + ld s8, 72(sp) + ld s9, 80(sp) + ld s10, 88(sp) + ld s11, 96(sp) + addi sp, sp, 160 ret -.p2align 2 -label1020: - addiw s6, s6, 1 - lw a0, 104(sp) - ble s6, a0, label367 - j label1015 diff --git a/tests/SysY2022/hidden_functional/19_search.sy.ir b/tests/SysY2022/hidden_functional/19_search.sy.ir index e8bf28b1e..af669124b 100644 --- a/tests/SysY2022/hidden_functional/19_search.sy.ir +++ b/tests/SysY2022/hidden_functional/19_search.sy.ir @@ -1,7 +1,7 @@ internal func @getint() -> i32 { NoMemoryRead NoMemoryWrite }; internal func @putint(i32) -> void { NoMemoryRead NoMemoryWrite }; internal func @putch(i32) -> void { NoMemoryRead NoMemoryWrite }; -internal [30 * [30 * i32]]* @a, align 8 { Flexible }; +internal [30 * [30 * i32]]* @a, align 8 { Flexible Transposed }; internal func @search(i32 %x, i32 %y, i32 %n, [4 * [2 * i32]]* %0, i32* %1, i32* %2, i32* %3, i32* %4) -> i32 { ^entry: i1 %5 = icmp sgt i32 %n, i32 10; @@ -28,8 +28,8 @@ internal func @search(i32 %x, i32 %y, i32 %n, [4 * [2 * i32]]* %0, i32* %1, i32* i32 %16 = phi [^while.header, i32 0] [^b3, i32 %31]; i32 %17 = phi [^while.header, i32 %y] [^b3, i32 %30]; i32 %18 = phi [^while.header, i32 %x] [^b3, i32 %28]; - [30 * i32]* %19 = getelementptr &([30 * [30 * i32]]* %6)[i64 0][i32 %18]; - i32* %20 = getelementptr &([30 * i32]* %19)[i64 0][i32 %17]; + [30 * i32]* %19 = getelementptr &([30 * [30 * i32]]* %6)[i64 0][i32 %17]; + i32* %20 = getelementptr &([30 * i32]* %19)[i64 0][i32 %18]; i32 %21 = load i32* %20; i1 %22 = icmp neq i32 %21, i32 1; cbr i1 %22(prob = 0.984615), ^while.body, ^b2; @@ -106,8 +106,8 @@ internal func @search(i32 %x, i32 %y, i32 %n, [4 * [2 * i32]]* %0, i32* %1, i32* i32 %64 = phi [^while.header2, i32 0] [^b14, i32 %79]; i32 %65 = phi [^while.header2, i32 %50] [^b14, i32 %78]; i32 %66 = phi [^while.header2, i32 %48] [^b14, i32 %76]; - [30 * i32]* %67 = getelementptr &([30 * [30 * i32]]* %6)[i64 0][i32 %66]; - i32* %68 = getelementptr &([30 * i32]* %67)[i64 0][i32 %65]; + [30 * i32]* %67 = getelementptr &([30 * [30 * i32]]* %6)[i64 0][i32 %65]; + i32* %68 = getelementptr &([30 * i32]* %67)[i64 0][i32 %66]; i32 %69 = load i32* %68; i1 %70 = icmp neq i32 %69, i32 1; cbr i1 %70(prob = 0.984615), ^while.body1, ^b13; @@ -194,16 +194,16 @@ func @main() -> i32 { NoRecurse Entry } { [30 * [30 * i32]]* %8 = ptrcast [30 * [30 * i32]]* @a to [30 * [30 * i32]]*; ubr ^b; ^b: - i32 %9 = phi [^entry, i32 0] [^b2, i32 %200]; - i32 %10 = phi [^entry, i32 0] [^b2, i32 %201]; + i32 %9 = phi [^entry, i32 0] [^b4, i32 %200]; + i32 %10 = phi [^entry, i32 0] [^b4, i32 %201]; i32 %11 = call () -> i32 @getint(); store i32* %w with i32 %11; i32 %12 = call () -> i32 @getint(); store i32* %h with i32 %12; i1 %13 = icmp neq i32 %11, i32 0; - cbr i1 %13(prob = 0.984615), ^while.body, ^b1; - ^while.body: - i32 %14 = phi [^b, i32 0] [^while.body, i32 %136]; + cbr i1 %13(prob = 0.984615), ^b1, ^b2; + ^b1: + i32 %14 = phi [^b, i32 0] [^b1, i32 %136]; [30 * i32]* %15 = getelementptr &([30 * [30 * i32]]* %8)[i64 0][i32 %14]; i32* %16 = getelementptr &([30 * i32]* %15)[i64 0][i64 0]; store i32* %16 with i32 1; @@ -447,10 +447,10 @@ func @main() -> i32 { NoRecurse Entry } { store i32* %135 with i32 1; i32 %136 = add i32 %14, i32 4; i1 %137 = icmp slt i32 %136, i32 28; - cbr i1 %137(prob = 0.857143), ^while.body, ^while.body1; - ^b1: + cbr i1 %137(prob = 0.857143), ^b1, ^b3; + ^b2: ret i32 0; - ^while.body1: + ^b3: i32* %138 = getelementptr &([30 * i32]* %15)[i64 4][i64 0]; store i32* %138 with i32 1; i32* %139 = getelementptr &([30 * i32]* %15)[i64 4][i64 1]; @@ -573,52 +573,52 @@ func @main() -> i32 { NoRecurse Entry } { store i32* %197 with i32 1; i32 %198 = load i32* %h; i1 %199 = icmp sgt i32 %198, i32 0; - cbr i1 %199(prob = 0.984615), ^while.body2, ^b2; - ^b2: - i32 %200 = phi [^while.body1, i32 %9] [^b3, i32 %217]; - i32 %201 = phi [^while.body1, i32 %10] [^b3, i32 %218]; + cbr i1 %199(prob = 0.984615), ^while.body, ^b4; + ^b4: + i32 %200 = phi [^b3, i32 %9] [^b5, i32 %217]; + i32 %201 = phi [^b3, i32 %10] [^b5, i32 %218]; i32 %202 = call (i32, i32, i32, [4 * [2 * i32]]*, i32*, i32*, i32*, i32*) -> i32 @search(i32 %201, i32 %200, i32 1, [4 * [2 * i32]]* %step, i32* %w, i32* %h, i32* %x_1, i32* %y_1); i1 %203 = icmp slt i32 %202, i32 11; i32 %204 = select i1 %203 ? i32 %202 : i32 -1; call (i32) -> void @putint(i32 %204); call (i32) -> void @putch(i32 10); ubr ^b; - ^while.body2: - i32 %205 = phi [^while.body1, i32 %9] [^b3, i32 %217]; - i32 %206 = phi [^while.body1, i32 %10] [^b3, i32 %218]; - i32 %207 = phi [^while.body1, i32 1] [^b3, i32 %219]; + ^while.body: + i32 %205 = phi [^b3, i32 %9] [^b5, i32 %217]; + i32 %206 = phi [^b3, i32 %10] [^b5, i32 %218]; + i32 %207 = phi [^b3, i32 1] [^b5, i32 %219]; i32 %208 = load i32* %w; i1 %209 = icmp sgt i32 %208, i32 0; - [30 * i32]* %210 = getelementptr &([30 * [30 * i32]]* %8)[i64 0][i32 %207]; - cbr i1 %209(prob = 0.984615), ^while.body3, ^b3; - ^while.body3: - i32 %211 = phi [^while.body2, i32 %205] [^b4, i32 %222]; - i32 %212 = phi [^while.body2, i32 %206] [^b4, i32 %223]; - i32 %213 = phi [^while.body2, i32 1] [^b4, i32 %224]; - i32 %214 = call () -> i32 @getint(); - i32* %215 = getelementptr &([30 * i32]* %210)[i64 0][i32 %213]; - store i32* %215 with i32 %214; - i1 %216 = icmp eq i32 %214, i32 2; - cbr i1 %216(prob = 0.5), ^b4, ^if.else; - ^b3: - i32 %217 = phi [^while.body2, i32 %205] [^b4, i32 %222]; - i32 %218 = phi [^while.body2, i32 %206] [^b4, i32 %223]; + cbr i1 %209(prob = 0.984615), ^while.body1, ^b5; + ^while.body1: + i32 %210 = phi [^while.body, i32 %205] [^b6, i32 %222]; + i32 %211 = phi [^while.body, i32 %206] [^b6, i32 %223]; + i32 %212 = phi [^while.body, i32 1] [^b6, i32 %224]; + i32 %213 = call () -> i32 @getint(); + [30 * i32]* %214 = getelementptr &([30 * [30 * i32]]* %8)[i64 0][i32 %212]; + i32* %215 = getelementptr &([30 * i32]* %214)[i64 0][i32 %207]; + store i32* %215 with i32 %213; + i1 %216 = icmp eq i32 %213, i32 2; + cbr i1 %216(prob = 0.5), ^b6, ^if.else; + ^b5: + i32 %217 = phi [^while.body, i32 %205] [^b6, i32 %222]; + i32 %218 = phi [^while.body, i32 %206] [^b6, i32 %223]; i32 %219 = add i32 %207, i32 1; i32 %220 = load i32* %h; i1 %221 = icmp sle i32 %219, i32 %220; - cbr i1 %221(prob = 0.984615), ^while.body2, ^b2; - ^b4: - i32 %222 = phi [^while.body3, i32 %213] [^if.else, i32 %211] [^if.then, i32 %211]; - i32 %223 = phi [^while.body3, i32 %207] [^if.else, i32 %212] [^if.then, i32 %212]; - i32 %224 = add i32 %213, i32 1; + cbr i1 %221(prob = 0.984615), ^while.body, ^b4; + ^b6: + i32 %222 = phi [^while.body1, i32 %212] [^if.else, i32 %210] [^if.then, i32 %210]; + i32 %223 = phi [^while.body1, i32 %207] [^if.else, i32 %211] [^if.then, i32 %211]; + i32 %224 = add i32 %212, i32 1; i32 %225 = load i32* %w; i1 %226 = icmp sle i32 %224, i32 %225; - cbr i1 %226(prob = 0.984615), ^while.body3, ^b3; + cbr i1 %226(prob = 0.984615), ^while.body1, ^b5; ^if.else: - i1 %227 = icmp eq i32 %214, i32 3; - cbr i1 %227(prob = 0.5), ^if.then, ^b4; + i1 %227 = icmp eq i32 %213, i32 3; + cbr i1 %227(prob = 0.5), ^if.then, ^b6; ^if.then: store i32* %x_1 with i32 %207; - store i32* %y_1 with i32 %213; - ubr ^b4; + store i32* %y_1 with i32 %212; + ubr ^b6; } diff --git a/tests/SysY2022/hidden_functional/20_sort.riscv.s b/tests/SysY2022/hidden_functional/20_sort.riscv.s index 5bdb98e8f..afc36a70d 100644 --- a/tests/SysY2022/hidden_functional/20_sort.riscv.s +++ b/tests/SysY2022/hidden_functional/20_sort.riscv.s @@ -67,11 +67,11 @@ label423: .p2align 2 label115: jal getch - sh2add a3, s2, s2 + sh2add a2, s2, s2 addiw a1, a0, -48 - slliw a4, a3, 1 - addi a2, a4, -48 - addw s2, s1, a2 + slliw a4, a2, 1 + addi a3, a4, -48 + addw s2, s1, a3 bltu a1, s5, label423 label8: subw a0, zero, s2 @@ -82,151 +82,152 @@ label8: sd s2, 104(sp) label481: ld a4, 104(sp) -pcrel565: +pcrel567: auipc a0, %pcrel_hi(a) -pcrel566: +pcrel568: auipc a1, %pcrel_hi(b) addiw s3, a4, -3 - addi s2, a0, %pcrel_lo(pcrel565) - addi s1, a1, %pcrel_lo(pcrel566) -pcrel567: + addi s2, a0, %pcrel_lo(pcrel567) + addi s1, a1, %pcrel_lo(pcrel568) +pcrel569: auipc a0, %pcrel_hi(c) - addi s0, a0, %pcrel_lo(pcrel567) - beq a4, zero, label10 + addi s0, a0, %pcrel_lo(pcrel569) + beq a4, zero, label26 mv s7, s2 mv s8, zero - j label100 + j label11 .p2align 2 -label414: +label190: mv s10, a0 .p2align 2 -label112: +label23: jal getch sh2add a3, s11, s11 addiw a1, a0, -48 slliw a2, a3, 1 addi a4, a2, -48 addw s11, s10, a4 - bltu a1, s5, label414 + bltu a1, s5, label190 subw a1, zero, s11 mv a0, a1 - bne s9, zero, label495 + bne s9, zero, label485 mv a0, s11 - sh2add a2, s8, s1 - sh2add a1, s8, s0 + sh2add a1, s8, s1 + sh2add a2, s8, s0 sw s11, 0(s7) addiw s8, s8, 1 - sw s11, 0(a2) sw s11, 0(a1) + sw s11, 0(a2) ld a4, 104(sp) - beq a4, s8, label10 + beq a4, s8, label26 .p2align 2 -label111: +label22: addi s7, s7, 4 .p2align 2 -label100: +label11: jal getch addiw a1, a0, -48 - bleu a1, s4, label378 + bleu a1, s4, label154 mv s9, a0 mv s10, zero .p2align 2 -label103: +label14: jal getch li a1, 1 addiw a2, a0, -48 - beq s9, s6, label493 + beq s9, s6, label483 mv a1, s10 -label493: - bleu a2, s4, label386 +label483: + bleu a2, s4, label162 mv s9, a0 mv s10, a1 - j label103 + j label14 .p2align 2 -label386: +label162: mv s9, a1 addiw a2, a0, -48 - bgeu a2, s5, label522 + bgeu a2, s5, label510 .p2align 2 -label392: +label168: mv s10, a0 mv s11, zero - j label112 -label378: + j label23 +label154: mv s9, zero addiw a2, a0, -48 - bltu a2, s5, label392 -label522: + bltu a2, s5, label168 +label510: mv s11, zero mv a1, zero mv a0, zero - bne s9, zero, label495 -label495: + bne s9, zero, label485 +label485: sw a0, 0(s7) - sh2add a2, s8, s1 - sh2add a1, s8, s0 - sw a0, 0(a2) - addiw s8, s8, 1 + sh2add a1, s8, s1 + sh2add a2, s8, s0 sw a0, 0(a1) + addiw s8, s8, 1 + sw a0, 0(a2) ld a4, 104(sp) - bne a4, s8, label111 -label10: + bne a4, s8, label22 +label26: ld a4, 104(sp) - ble a4, zero, label11 + ble a4, zero, label27 mv a0, s2 li a1, 1 - bgt a4, a1, label91 - j label11 + bgt a4, a1, label107 + j label27 .p2align 2 -label521: - addiw a3, a3, 1 +label113: + addi a0, a0, 4 + addiw a1, a1, 1 ld a4, 104(sp) - ble a4, a3, label526 + ble a4, a1, label27 .p2align 2 -label98: - addi a2, a2, 4 +label107: + sh2add a2, a1, s2 + mv a3, a1 lw a4, 0(a0) lw a5, 0(a2) - ble a4, a5, label521 + bgt a4, a5, label111 + addiw a3, a1, 1 + ld a4, 104(sp) + bgt a4, a3, label114 + addi a0, a0, 4 + addiw a1, a1, 1 + bgt a4, a1, label107 + j label27 .p2align 2 -label95: +label111: sw a5, 0(a0) sw a4, 0(a2) addiw a3, a3, 1 ld a4, 104(sp) - bgt a4, a3, label98 - addi a0, a0, 4 - addiw a1, a1, 1 - ble a4, a1, label11 + ble a4, a3, label113 .p2align 2 -label91: - sh2add a2, a1, s2 - mv a3, a1 +label114: + addi a2, a2, 4 lw a4, 0(a0) lw a5, 0(a2) - bgt a4, a5, label95 - addiw a3, a1, 1 + bgt a4, a5, label111 + addiw a3, a3, 1 ld a4, 104(sp) - bgt a4, a3, label98 + bgt a4, a3, label114 addi a0, a0, 4 addiw a1, a1, 1 - bgt a4, a1, label91 -label11: + bgt a4, a1, label107 +label27: auipc a0, %pcrel_hi(cnt) ld a4, 104(sp) - addi a1, a0, %pcrel_lo(label11) - ble a4, zero, label157 + addi a1, a0, %pcrel_lo(label27) + ble a4, zero, label198 li a0, 3 - ble a4, a0, label161 + ble a4, a0, label202 mv a0, s1 li a3, -100 mv a2, zero - j label24 .p2align 2 -label28: - addi a0, a0, 16 -.p2align 2 -label24: +label40: lw a4, 0(a0) addiw a2, a2, 4 sh2add t0, a4, a1 @@ -240,11 +241,11 @@ label24: sw t3, 0(t1) max t3, a3, a4 lw t0, 8(a0) - max t4, a5, t3 sh2add t2, t0, a1 - lw t1, 0(t2) - addi t5, t1, 1 - sw t5, 0(t2) + lw t4, 0(t2) + addi t1, t4, 1 + max t4, a5, t3 + sw t1, 0(t2) lw t1, 12(a0) max t2, t0, t4 sh2add a4, t1, a1 @@ -252,192 +253,197 @@ label24: lw a5, 0(a4) addi t0, a5, 1 sw t0, 0(a4) - bgt s3, a2, label28 + ble s3, a2, label252 + addi a0, a0, 16 + j label40 +label252: mv a5, a3 -label13: +label29: ld a4, 104(sp) - ble a4, a2, label29 + ble a4, a2, label45 sh2add a0, a2, s1 mv a3, a5 - j label18 -label22: + j label34 +label38: addi a0, a0, 4 -label18: +label34: lw t0, 0(a0) addiw a2, a2, 1 max a3, a3, t0 sh2add a4, t0, a1 - lw a5, 0(a4) - addi t1, a5, 1 - sw t1, 0(a4) + lw t1, 0(a4) + addi a5, t1, 1 + sw a5, 0(a4) ld a4, 104(sp) - bgt a4, a2, label22 -label29: + bgt a4, a2, label38 +label45: addiw a2, a3, 1 - blt a3, zero, label31 + blt a3, zero, label47 mv a0, zero mv a5, zero lw a4, 0(a1) - bne a4, zero, label78 - j label84 + bne a4, zero, label97 + j label94 .p2align 2 -label83: +label102: addi a3, a3, 4 .p2align 2 -label79: +label98: sw a0, 0(a3) addiw a4, a4, -1 addiw a5, a5, 1 - bne a4, zero, label83 + bne a4, zero, label102 addiw a0, a0, 1 - ble a2, a0, label31 + ble a2, a0, label47 .p2align 2 -label86: +label96: addi a1, a1, 4 lw a4, 0(a1) - beq a4, zero, label84 + beq a4, zero, label94 .p2align 2 -label78: +label97: sh2add a3, a5, s1 - j label79 -label31: + j label98 +label47: ld a4, 104(sp) - ble a4, zero, label56 + ble a4, zero, label72 mv a1, s0 - mv a5, zero + mv a3, zero li a0, 1 - bgt a4, a0, label39 - j label36 + bgt a4, a0, label52 + j label266 .p2align 2 -label512: - sh2add a2, a5, s0 +label513: + mv a5, a2 lw a3, 0(a1) + sh2add a2, a2, s0 lw t0, 0(a2) sw t0, 0(a1) sw a3, 0(a2) ld a4, 104(sp) - ble a4, a0, label56 + ble a4, a0, label72 .p2align 2 -label38: +label71: addi a1, a1, 4 - mv a5, a0 + mv a3, a0 ld a4, 104(sp) addiw a0, a0, 1 - ble a4, a0, label36 + ble a4, a0, label266 .p2align 2 -label39: - addiw a2, a5, 4 +label52: + addiw a2, a3, 4 ld a4, 104(sp) - ble a4, a2, label238 + ble a4, a2, label271 sh2add a2, a0, s0 + mv a4, a3 mv a3, a0 - sh2add t2, a5, s0 - lw t0, 0(a2) - mv a4, a0 - lw t1, 0(t2) - blt t0, t1, label483 - mv a4, a5 -label483: sh2add t2, a4, s0 - lw t0, 4(a2) - addiw t3, a3, 1 + lw t0, 0(a2) + mv a5, a0 lw t1, 0(t2) - mv a5, t3 - blt t0, t1, label485 + blt t0, t1, label487 mv a5, a4 -label485: +label487: sh2add t2, a5, s0 - lw t0, 8(a2) - addiw t3, a3, 2 + lw t0, 4(a2) + addiw t3, a3, 1 lw t1, 0(t2) mv a4, t3 - blt t0, t1, label487 + blt t0, t1, label489 mv a4, a5 -label487: +label489: sh2add t2, a4, s0 - lw t0, 12(a2) - addiw t3, a3, 3 + lw t0, 8(a2) + addiw t3, a3, 2 lw t1, 0(t2) mv a5, t3 - blt t0, t1, label489 + blt t0, t1, label491 mv a5, a4 -label489: +label491: + sh2add t2, a5, s0 + lw t0, 12(a2) + addiw t3, a3, 3 + lw t1, 0(t2) + mv a4, t3 + blt t0, t1, label493 + mv a4, a5 +label493: addiw a3, a3, 4 - ble s3, a3, label275 + ble s3, a3, label308 .p2align 2 -label45: +label58: addi a2, a2, 16 - sh2add t2, a5, s0 - mv a4, a3 + sh2add t2, a4, s0 + mv a5, a3 lw t0, 0(a2) lw t1, 0(t2) - blt t0, t1, label483 - mv a4, a5 + blt t0, t1, label487 + mv a5, a4 lw t0, 4(a2) addiw t3, a3, 1 lw t1, 0(t2) - mv a5, t3 - blt t0, t1, label485 - mv a5, a4 - sh2add t2, a4, s0 - lw t0, 8(a2) - addiw t3, a3, 2 - lw t1, 0(t2) mv a4, t3 - blt t0, t1, label487 + blt t0, t1, label489 mv a4, a5 sh2add t2, a5, s0 - lw t0, 12(a2) - addiw t3, a3, 3 + lw t0, 8(a2) + addiw t3, a3, 2 lw t1, 0(t2) mv a5, t3 - blt t0, t1, label489 + blt t0, t1, label491 mv a5, a4 + sh2add t2, a4, s0 + lw t0, 12(a2) + addiw t3, a3, 3 + lw t1, 0(t2) + mv a4, t3 + blt t0, t1, label493 + mv a4, a5 addiw a3, a3, 4 - bgt s3, a3, label45 - mv t0, a4 + bgt s3, a3, label58 + mv a2, a5 ld a4, 104(sp) - ble a4, a3, label512 + ble a4, a3, label513 .p2align 2 -label50: +label63: sh2add a2, a3, s0 - mv a4, t0 - sh2add t2, t0, s0 + mv a4, a5 + sh2add t1, a5, s0 lw t0, 0(a2) mv a5, a3 - lw t1, 0(t2) - blt t0, t1, label491 + lw t2, 0(t1) + blt t0, t2, label495 mv a5, a4 -label491: +label495: addiw a3, a3, 1 ld a4, 104(sp) - ble a4, a3, label293 + ble a4, a3, label326 .p2align 2 -label55: +label68: addi a2, a2, 4 mv a4, a5 - sh2add t2, a5, s0 + sh2add t1, a5, s0 lw t0, 0(a2) mv a5, a3 - lw t1, 0(t2) - blt t0, t1, label491 + lw t2, 0(t1) + blt t0, t2, label495 mv a5, a4 addiw a3, a3, 1 ld a4, 104(sp) - bgt a4, a3, label55 + bgt a4, a3, label68 sh2add a2, a5, s0 lw a3, 0(a1) lw t0, 0(a2) sw t0, 0(a1) sw a3, 0(a2) - bgt a4, a0, label38 -label56: + bgt a4, a0, label71 +label72: ld a4, 104(sp) - beq a4, zero, label297 + beq a4, zero, label338 mv a0, s1 mv a1, zero .p2align 2 -label58: +label74: sh2add a3, a1, s2 lw a5, 0(a0) lw a2, 0(a3) @@ -445,55 +451,25 @@ label58: subw a4, a5, a2 addiw a1, a1, 1 sw a4, 0(a0) - lw t1, 0(a3) - subw a5, t1, a4 - subw t0, a5, a2 - sw t0, 0(a3) + lw a5, 0(a3) + subw t0, a5, a4 + subw t1, t0, a2 + sw t1, 0(a3) ld a4, 104(sp) subw a2, a4, a1 - beq a2, zero, label297 + beq a2, zero, label338 addi a0, a0, 4 - j label58 -label297: + j label74 +label338: mv a0, zero -label63: +label79: ld a4, 104(sp) subw a1, a0, a4 - beq a1, zero, label319 + beq a1, zero, label360 lw a1, 0(s1) - bne a1, zero, label325 - lw a1, 0(s0) - beq a1, zero, label329 - li a0, 2 - j label70 -label36: - sh2add a2, a5, s0 - lw a3, 0(a1) - lw t0, 0(a2) - sw t0, 0(a1) - sw a3, 0(a2) - ld a4, 104(sp) - bgt a4, a0, label38 - j label56 -label84: - addiw a0, a0, 1 - bgt a2, a0, label86 - j label31 -label238: - mv t0, a5 - mv a3, a0 - ld a4, 104(sp) - mv a5, zero - bgt a4, a0, label50 - mv a2, s0 - lw a3, 0(a1) - lw t0, 0(s0) - sw t0, 0(a1) - sw a3, 0(s0) - j label56 -label325: + beq a1, zero, label84 li a0, 1 -label70: +label86: ld ra, 0(sp) ld s4, 8(sp) ld s6, 16(sp) @@ -509,47 +485,73 @@ label70: ld s11, 96(sp) addi sp, sp, 112 ret -.p2align 2 -label526: - addi a0, a0, 4 - addiw a1, a1, 1 +label266: + mv a5, a3 + sh2add a2, a3, s0 + lw a3, 0(a1) + lw t0, 0(a2) + sw t0, 0(a1) + sw a3, 0(a2) ld a4, 104(sp) - bgt a4, a1, label91 - j label11 + bgt a4, a0, label71 + j label72 +label94: + addiw a0, a0, 1 + bgt a2, a0, label96 + j label47 +label271: + mv a5, a3 + mv a2, zero + ld a4, 104(sp) + mv a3, a0 + bgt a4, a0, label63 + mv a5, zero + lw a3, 0(a1) + mv a2, s0 + lw t0, 0(s0) + sw t0, 0(a1) + sw a3, 0(s0) + j label72 +label84: + lw a1, 0(s0) + beq a1, zero, label370 + li a0, 2 + j label86 .p2align 2 -label293: +label326: sh2add a2, a5, s0 lw a3, 0(a1) lw t0, 0(a2) sw t0, 0(a1) sw a3, 0(a2) ld a4, 104(sp) - bgt a4, a0, label38 - j label56 -label275: - mv t0, a5 + bgt a4, a0, label71 + j label72 +label134: + mv s2, zero + j label8 +label308: + mv a5, a4 + mv a2, a4 ld a4, 104(sp) - bgt a4, a3, label50 - j label512 -label329: + bgt a4, a3, label63 + j label513 +label370: addiw a0, a0, 1 addi s1, s1, 4 addi s0, s0, 4 - j label63 -label161: - li a5, -100 - mv a2, zero - mv a3, zero - j label13 + j label79 +label360: + li a0, -123 + j label86 label121: mv s0, zero j label5 -label134: - mv s2, zero - j label8 -label157: +label198: li a3, -100 + j label45 +label202: + li a5, -100 + mv a2, zero + mv a3, zero j label29 -label319: - li a0, -123 - j label70 diff --git a/tests/SysY2022/hidden_functional/21_union_find.riscv.s b/tests/SysY2022/hidden_functional/21_union_find.riscv.s index 5fdc460f6..cfd6e2a88 100644 --- a/tests/SysY2022/hidden_functional/21_union_find.riscv.s +++ b/tests/SysY2022/hidden_functional/21_union_find.riscv.s @@ -27,14 +27,14 @@ pcrel143: beq s0, s1, label34 sh2add a1, s1, s3 lw s2, 0(a1) - bne s1, s2, label7 + bne s1, s2, label5 mv a0, s1 sh2add a1, s0, s3 sw s1, 0(a1) - j label2 + j label24 label34: mv a0, s0 -label2: +label24: ld ra, 0(sp) ld s0, 8(sp) ld s5, 16(sp) @@ -47,20 +47,20 @@ label2: ld s8, 72(sp) addi sp, sp, 80 ret -label7: +label5: sh2add a1, s2, s3 lw s4, 0(a1) - bne s2, s4, label8 + bne s2, s4, label6 mv a0, s2 sh2add a1, s1, s3 sw s2, 0(a1) sh2add a1, s0, s3 sw s2, 0(a1) - j label2 -label8: + j label24 +label6: sh2add a1, s4, s3 lw s5, 0(a1) - bne s4, s5, label9 + bne s4, s5, label7 mv a0, s4 sh2add a1, s2, s3 sw s4, 0(a1) @@ -68,11 +68,11 @@ label8: sw s4, 0(a1) sh2add a1, s0, s3 sw s4, 0(a1) - j label2 -label9: + j label24 +label7: sh2add a1, s5, s3 lw s6, 0(a1) - bne s5, s6, label10 + bne s5, s6, label8 mv a0, s5 sh2add a1, s4, s3 sw s5, 0(a1) @@ -82,11 +82,11 @@ label9: sw s5, 0(a1) sh2add a1, s0, s3 sw s5, 0(a1) - j label2 -label10: + j label24 +label8: sh2add a1, s6, s3 lw s7, 0(a1) - bne s6, s7, label11 + bne s6, s7, label9 mv a0, s6 sh2add a1, s5, s3 sw s6, 0(a1) @@ -98,11 +98,31 @@ label10: sw s6, 0(a1) sh2add a1, s0, s3 sw s6, 0(a1) - j label2 -label11: + j label24 +label9: sh2add a1, s7, s3 lw s8, 0(a1) - bne s7, s8, label12 + beq s7, s8, label78 + sh2add a1, s8, s3 + lw a0, 0(a1) + bne s8, a0, label13 + mv a0, s8 + sh2add a1, s7, s3 + sw s8, 0(a1) + sh2add a1, s6, s3 + sw s8, 0(a1) + sh2add a1, s5, s3 + sw s8, 0(a1) + sh2add a1, s4, s3 + sw s8, 0(a1) + sh2add a1, s2, s3 + sw s8, 0(a1) + sh2add a1, s1, s3 + sw s8, 0(a1) + sh2add a1, s0, s3 + sw s8, 0(a1) + j label24 +label78: mv a0, s7 sh2add a1, s6, s3 sw s7, 0(a1) @@ -116,11 +136,8 @@ label11: sw s7, 0(a1) sh2add a1, s0, s3 sw s7, 0(a1) - j label2 -label12: - sh2add a1, s8, s3 - lw a0, 0(a1) - beq s8, a0, label85 + j label24 +label13: jal find sh2add a1, s8, s3 sw a0, 0(a1) @@ -138,24 +155,7 @@ label12: sw a0, 0(a1) sh2add a1, s0, s3 sw a0, 0(a1) - j label2 -label85: - mv a0, s8 - sh2add a1, s7, s3 - sw s8, 0(a1) - sh2add a1, s6, s3 - sw s8, 0(a1) - sh2add a1, s5, s3 - sw s8, 0(a1) - sh2add a1, s4, s3 - sw s8, 0(a1) - sh2add a1, s2, s3 - sw s8, 0(a1) - sh2add a1, s1, s3 - sw s8, 0(a1) - sh2add a1, s0, s3 - sw s8, 0(a1) - j label2 + j label24 .p2align 2 .globl main main: @@ -190,35 +190,35 @@ label159: addi a0, a0, 64 .p2align 2 label156: - addiw a3, a1, 1 + addiw a2, a1, 1 addiw a5, a1, 3 - slli a4, a3, 32 + slli a4, a2, 32 slli t0, a5, 32 - addiw a3, a1, 2 - add.uw a2, a1, a4 - add.uw a4, a3, t0 - sd a2, 0(a0) - addiw a3, a1, 5 - addiw a2, a1, 4 - slli a5, a3, 32 - sd a4, 8(a0) - addiw a3, a1, 6 - add.uw a4, a2, a5 - addiw a2, a1, 7 - sd a4, 16(a0) + addiw a2, a1, 2 + add.uw a3, a1, a4 + add.uw a4, a2, t0 + sd a3, 0(a0) + addiw a2, a1, 5 + addiw a3, a1, 4 slli a5, a2, 32 - addiw a2, a1, 8 + sd a4, 8(a0) + addiw a2, a1, 6 add.uw a4, a3, a5 - addiw a3, a1, 9 - sd a4, 24(a0) - slli a5, a3, 32 + addiw a5, a1, 7 + sd a4, 16(a0) + slli t0, a5, 32 + addiw a4, a1, 8 + add.uw a3, a2, t0 + addiw t0, a1, 9 + sd a3, 24(a0) + slli a5, t0, 32 addiw a3, a1, 10 - add.uw a4, a2, a5 - addiw a2, a1, 11 - sd a4, 32(a0) - slli a5, a2, 32 + add.uw a2, a4, a5 + addiw a5, a1, 11 + sd a2, 32(a0) + slli t0, a5, 32 addiw a2, a1, 12 - add.uw a4, a3, a5 + add.uw a4, a3, t0 addiw a3, a1, 13 sd a4, 40(a0) slli a4, a3, 32 @@ -269,10 +269,10 @@ label167: mv s5, zero j label168 .p2align 2 -label171: +label170: sh2add a1, a0, s1 - addiw s5, s5, 1 sw a2, 0(a1) + addiw s5, s5, 1 ble s4, s5, label172 .p2align 2 label168: @@ -285,7 +285,7 @@ label168: mv a2, a0 mv a0, a3 jal find - bne a2, a0, label171 + bne a2, a0, label170 addiw s5, s5, 1 bgt s4, s5, label168 label172: @@ -327,24 +327,24 @@ label176: sext.w t0, a2 addw a3, a5, t1 addiw a5, a1, 4 - xor t1, t0, a5 + xor t2, t0, a5 srai a5, a2, 32 - sltiu t2, t1, 1 + sltiu t1, t2, 1 ld a2, 24(a0) - addiw t1, a1, 5 - addw a4, a3, t2 - xor t2, a5, t1 + addiw t2, a1, 5 + addw a4, a3, t1 + xor t1, a5, t2 sext.w a5, a2 - sltiu t0, t2, 1 + sltiu t0, t1, 1 + addiw t1, a1, 6 addw a3, a4, t0 - addiw t0, a1, 6 - xor t2, a5, t0 - addiw t0, a1, 7 + xor t0, a5, t1 srai a5, a2, 32 - sltiu t1, t2, 1 + sltiu t2, t0, 1 ld a2, 32(a0) + addiw t0, a1, 7 + addw a4, a3, t2 xor t2, a5, t0 - addw a4, a3, t1 addiw t0, a1, 8 sext.w a5, a2 sltiu t1, t2, 1 @@ -387,10 +387,10 @@ label176: srai a5, a2, 32 sltiu t1, t0, 1 addiw a1, a1, 16 - xor t0, a5, t2 addw a4, a3, t1 - sltiu a3, t0, 1 - addw a2, a4, a3 + xor a3, a5, t2 + sltiu t0, a3, 1 + addw a2, a4, t0 bgt s3, a1, label180 mv a3, a1 mv a4, a2 diff --git a/tests/SysY2022/hidden_functional/22_matrix_multiply.riscv.s b/tests/SysY2022/hidden_functional/22_matrix_multiply.riscv.s index 502c5ae61..c86341cd6 100644 --- a/tests/SysY2022/hidden_functional/22_matrix_multiply.riscv.s +++ b/tests/SysY2022/hidden_functional/22_matrix_multiply.riscv.s @@ -14,278 +14,265 @@ res: .p2align 2 .globl main main: - # stack usage: CalleeArg[0] Local[0] RegSpill[16] CalleeSaved[104] - addi sp, sp, -120 + # stack usage: CalleeArg[0] Local[0] RegSpill[8] CalleeSaved[104] + addi sp, sp, -112 sd ra, 0(sp) sd s2, 8(sp) - sd s6, 16(sp) - sd s1, 24(sp) - sd s0, 32(sp) - sd s5, 40(sp) - sd s4, 48(sp) - sd s8, 56(sp) - sd s3, 64(sp) - sd s7, 72(sp) + sd s0, 16(sp) + sd s5, 24(sp) + sd s7, 32(sp) + sd s3, 40(sp) + sd s6, 48(sp) + sd s1, 56(sp) + sd s4, 64(sp) + sd s8, 72(sp) sd s9, 80(sp) sd s10, 88(sp) sd s11, 96(sp) jal getint sd a0, 104(sp) jal getint - li s4, 3 - li s1, 400 -pcrel285: - auipc a1, %pcrel_hi(b) - addiw s2, a0, -3 - sd a0, 112(sp) - mv t2, a0 - addi s0, a1, %pcrel_lo(pcrel285) -pcrel286: - auipc a0, %pcrel_hi(a) - addi s6, a0, %pcrel_lo(pcrel286) -pcrel287: - auipc a0, %pcrel_hi(res) - addi s5, a0, %pcrel_lo(pcrel287) + li s5, 3 + addiw s0, a0, -3 + li s4, 400 +pcrel277: + auipc a1, %pcrel_hi(a) + mv s2, a0 + addi s7, a1, %pcrel_lo(pcrel277) +pcrel278: + auipc a0, %pcrel_hi(b) +pcrel279: + auipc a1, %pcrel_hi(res) + addi s3, a0, %pcrel_lo(pcrel278) + addi s6, a1, %pcrel_lo(pcrel279) ld a0, 104(sp) - ble a0, zero, label2 - mv s3, s6 - mv s7, zero - ld t2, 112(sp) - bgt t2, zero, label62 - j label67 + ble a0, zero, label13 + mv s1, s7 + mv s8, zero + bgt s2, zero, label8 + j label6 .p2align 2 -label66: - addi s8, s8, 4 - mv s9, a1 +label12: + addi s9, s9, 4 .p2align 2 -label63: +label9: jal getint - addiw a1, s9, 1 - sw a0, 0(s8) - ld t2, 112(sp) - bgt t2, a1, label66 - addiw s7, s7, 1 + addiw s10, s10, 1 + sw a0, 0(s9) + bgt s2, s10, label12 + addiw s8, s8, 1 ld a0, 104(sp) - ble a0, s7, label2 + ble a0, s8, label13 .p2align 2 -label68: - addi s3, s3, 400 - ld t2, 112(sp) - ble t2, zero, label67 +label7: + addi s1, s1, 400 + ble s2, zero, label6 .p2align 2 -label62: - mv s8, s3 - mv s9, zero - j label63 -label2: +label8: + mv s9, s1 + mv s10, zero + j label9 +label13: jal getint - mv s8, a0 + mv s9, a0 jal getint - mv s3, a0 - ble s8, zero, label3 - mv s7, s0 - mv s9, zero - bgt a0, zero, label51 - j label56 -.p2align 2 -label192: - addiw s9, s9, 1 - ble s8, s9, label3 + mv s1, a0 + ble s9, zero, label22 + mv s8, zero + j label14 .p2align 2 -label57: - addi s7, s7, 400 - ble s3, zero, label56 -.p2align 2 -label51: - mv s10, s7 - mv s11, zero +label20: + addi s10, s10, 400 .p2align 2 -label52: +label17: jal getint addiw s11, s11, 1 - sw a0, 0(s10) - ble s3, s11, label192 - addi s10, s10, 4 - j label52 -label3: + sh2add a1, s8, s10 + sw a0, 0(a1) + bgt s1, s11, label20 + addiw s8, s8, 1 + ble s9, s8, label22 +.p2align 2 +label14: + ble s1, zero, label21 + mv s10, s3 + mv s11, zero + j label17 +label22: ld a0, 104(sp) - ble a0, zero, label46 - mv a2, s5 - mv a3, zero - mv a0, zero + ble a0, zero, label65 mv a1, s6 - bgt s3, zero, label8 - j label33 + mv a2, zero + mv a3, zero + mv a0, s7 + bgt s1, zero, label27 + j label52 .p2align 2 -label251: - addiw a3, a3, 1 +label250: + addiw a2, a2, 1 ld a0, 104(sp) - ble a0, a3, label248 + ble a0, a2, label245 .p2align 2 -label45: - addi a2, a2, 400 - mul a0, a3, s1 - add a1, s6, a0 - ble s3, zero, label33 +label53: + addi a1, a1, 400 + mul a3, a2, s4 + add a0, s7, a3 + ble s1, zero, label52 .p2align 2 -label8: - mv a4, a2 - mv a0, zero - ld t2, 112(sp) - bgt t2, zero, label14 - li a0, 1 - bgt s3, a0, label13 - j label103 +label27: + mv a3, a1 + mv a4, zero + bgt s2, zero, label33 + li a4, 1 + bgt s1, a4, label32 + j label133 .p2align 2 -label128: - sw t1, 0(a4) - addiw a0, a0, 1 - ble s3, a0, label251 +label51: + addi t0, t0, 4 .p2align 2 -label13: - addi a4, a4, 4 - ld t2, 112(sp) - ble t2, zero, label249 +label47: + sh2add t3, t1, a5 + lw t4, 0(t0) + addiw t1, t1, 1 + lw t6, 0(t3) + mulw t5, t4, t6 + addw t2, t2, t5 + bgt s2, t1, label51 + sw t2, 0(a3) + addiw a4, a4, 1 + ble s1, a4, label250 .p2align 2 -label14: - lw t1, 0(a4) - ld t2, 112(sp) - ble t2, s4, label109 - mv a5, a1 - mv t0, zero +label32: + addi a3, a3, 4 + ble s2, zero, label246 .p2align 2 -label28: - mul t4, t0, s1 - lw t3, 0(a5) - addiw t0, t0, 4 - add t5, s0, t4 - sh2add t2, a0, t5 - lw t4, 0(t2) - lw t6, 4(a5) - lw t5, 400(t2) - mulw a7, t3, t4 - mulw a6, t6, t5 - lw t6, 8(a5) - addw t5, a6, a7 - lw t4, 800(t2) - mulw a6, t6, t4 - lw t4, 12(a5) - addw t3, t5, a6 - lw t6, 1200(t2) - mulw t5, t4, t6 - addw a6, t3, t5 - addw t1, t1, a6 - ble s2, t0, label160 - addi a5, a5, 16 - j label28 +label33: + mul t0, a4, s4 + lw t2, 0(a3) + add a5, s3, t0 + ble s2, s5, label141 + mv t0, a0 + mv t1, zero .p2align 2 -label160: - mv t3, t1 - ld t2, 112(sp) - ble t2, t0, label252 +label35: + sh2add t3, t1, a5 + lw t4, 0(t0) + addiw t1, t1, 4 + lw t5, 0(t3) + lw a6, 4(t0) + lw t6, 4(t3) + mulw s8, t4, t5 + mulw a7, a6, t6 + lw a6, 8(t0) + addw t6, a7, s8 + lw t5, 8(t3) + mulw a7, a6, t5 + lw t5, 12(t0) + addw t4, t6, a7 + lw a7, 12(t3) + mulw t6, t5, a7 + addw a6, t4, t6 + addw t2, t2, a6 + ble s0, t1, label170 + addi t0, t0, 16 + j label35 .p2align 2 -label21: - mul t2, t0, s1 - mv t1, t3 - add a5, s0, t2 +label170: + mv t3, t2 + ble s2, t1, label248 .p2align 2 -label22: - sh2add t3, t0, a1 - sh2add t4, a0, a5 - addiw t0, t0, 1 - lw t2, 0(t3) - lw t5, 0(t4) - mulw t3, t2, t5 - ld t2, 112(sp) - addw t1, t1, t3 - ble t2, t0, label128 - addi a5, a5, 400 - j label22 +label46: + sh2add t0, t1, a0 + mv t2, t3 + j label47 .p2align 2 -label109: - mv t0, zero - mv t3, t1 +label141: mv t1, zero - ld t2, 112(sp) - bgt t2, zero, label21 - sw zero, 0(a4) - addiw a0, a0, 1 - bgt s3, a0, label13 - addiw a3, a3, 1 + mv t3, t2 + mv t2, zero + bgt s2, zero, label46 + sw zero, 0(a3) + addiw a4, a4, 1 + bgt s1, a4, label32 + addiw a2, a2, 1 ld a0, 104(sp) - bgt a0, a3, label45 - j label248 + bgt a0, a2, label53 + j label245 .p2align 2 -label252: - sw t1, 0(a4) - addiw a0, a0, 1 - bgt s3, a0, label13 - addiw a3, a3, 1 - ld a0, 104(sp) - bgt a0, a3, label45 label248: + sw t2, 0(a3) + addiw a4, a4, 1 + bgt s1, a4, label32 + addiw a2, a2, 1 + ld a0, 104(sp) + bgt a0, a2, label53 + j label245 +label21: + addiw s8, s8, 1 + bgt s9, s8, label14 + j label22 +label245: mv s0, zero - bgt s3, zero, label38 + bgt s1, zero, label60 + j label58 +.p2align 2 +label64: + addi s2, s2, 4 .p2align 2 -label43: +label61: + lw a0, 0(s2) + jal putint + li a0, 32 + jal putch + addiw s3, s3, 1 + bgt s1, s3, label64 +.p2align 2 +label58: li a0, 10 jal putch ld a0, 104(sp) addiw s0, s0, 1 - ble a0, s0, label46 - addi s5, s5, 400 - ble s3, zero, label43 -.p2align 2 -label38: - mv s1, s5 - mv s2, zero + ble a0, s0, label65 + addi s6, s6, 400 + ble s1, zero, label58 .p2align 2 -label39: - lw a0, 0(s1) - jal putint - li a0, 32 - jal putch - addiw s2, s2, 1 - ble s3, s2, label43 - addi s1, s1, 4 - j label39 -label46: +label60: + mv s2, s6 + mv s3, zero + j label61 +label65: mv a0, zero ld ra, 0(sp) ld s2, 8(sp) - ld s6, 16(sp) - ld s1, 24(sp) - ld s0, 32(sp) - ld s5, 40(sp) - ld s4, 48(sp) - ld s8, 56(sp) - ld s3, 64(sp) - ld s7, 72(sp) + ld s0, 16(sp) + ld s5, 24(sp) + ld s7, 32(sp) + ld s3, 40(sp) + ld s6, 48(sp) + ld s1, 56(sp) + ld s4, 64(sp) + ld s8, 72(sp) ld s9, 80(sp) ld s10, 88(sp) ld s11, 96(sp) - addi sp, sp, 120 + addi sp, sp, 112 ret -label67: - addiw s7, s7, 1 +label6: + addiw s8, s8, 1 ld a0, 104(sp) - bgt a0, s7, label68 - j label2 -label56: - addiw s9, s9, 1 - bgt s8, s9, label57 - j label3 -label33: - addiw a3, a3, 1 + bgt a0, s8, label7 + j label13 +label52: + addiw a2, a2, 1 ld a0, 104(sp) - bgt a0, a3, label45 - j label248 + bgt a0, a2, label53 + j label245 .p2align 2 -label249: - addiw a0, a0, 1 - bgt s3, a0, label13 -label103: - addiw a3, a3, 1 +label246: + addiw a4, a4, 1 + bgt s1, a4, label32 +label133: + addiw a2, a2, 1 ld a0, 104(sp) - bgt a0, a3, label45 - j label248 + bgt a0, a2, label53 + j label245 diff --git a/tests/SysY2022/hidden_functional/22_matrix_multiply.sy.ir b/tests/SysY2022/hidden_functional/22_matrix_multiply.sy.ir index 0e1dc8c9e..a84ea8634 100644 --- a/tests/SysY2022/hidden_functional/22_matrix_multiply.sy.ir +++ b/tests/SysY2022/hidden_functional/22_matrix_multiply.sy.ir @@ -2,7 +2,7 @@ internal func @getint() -> i32 { NoMemoryRead NoMemoryWrite }; internal func @putint(i32) -> void { NoMemoryRead NoMemoryWrite }; internal func @putch(i32) -> void { NoMemoryRead NoMemoryWrite }; internal [100 * [100 * i32]]* @a, align 8 { Flexible }; -internal [100 * [100 * i32]]* @b, align 8 { Flexible }; +internal [100 * [100 * i32]]* @b, align 8 { Flexible Transposed }; internal [100 * [100 * i32]]* @res, align 8 { Flexible }; func @main() -> i32 { NoRecurse Entry } { ^entry: @@ -33,128 +33,122 @@ func @main() -> i32 { NoRecurse Entry } { i1 %16 = icmp sgt i32 %0, i32 %15; cbr i1 %16(prob = 0.984615), ^while.body, ^b; ^while.body1: - i32 %17 = phi [^b, i32 0] [^b3, i32 %24]; - cbr i1 %13(prob = 0.984615), ^indirect1, ^b3; + i32 %17 = phi [^b, i32 0] [^b3, i32 %29]; + cbr i1 %13(prob = 0.984615), ^while.body3, ^b3; ^b2: - cbr i1 %1(prob = 0.984615), ^while.body3, ^b4; - ^indirect1: - [100 * i32]* %18 = getelementptr &([100 * [100 * i32]]* %7)[i64 0][i32 %17]; - ubr ^while.body4; + cbr i1 %1(prob = 0.984615), ^while.body4, ^b4; ^while.body2: - i32 %19 = phi [^indirect, i32 0] [^while.body2, i32 %22]; - i32 %20 = call () -> i32 @getint(); - i32* %21 = getelementptr &([100 * i32]* %14)[i64 0][i32 %19]; - store i32* %21 with i32 %20; - i32 %22 = add i32 %19, i32 1; - i1 %23 = icmp sgt i32 %2, i32 %22; - cbr i1 %23(prob = 0.984615), ^while.body2, ^b1; - ^b3: - i32 %24 = add i32 %17, i32 1; - i1 %25 = icmp sgt i32 %10, i32 %24; - cbr i1 %25(prob = 0.984615), ^while.body1, ^b2; + i32 %18 = phi [^indirect, i32 0] [^while.body2, i32 %21]; + i32 %19 = call () -> i32 @getint(); + i32* %20 = getelementptr &([100 * i32]* %14)[i64 0][i32 %18]; + store i32* %20 with i32 %19; + i32 %21 = add i32 %18, i32 1; + i1 %22 = icmp sgt i32 %2, i32 %21; + cbr i1 %22(prob = 0.984615), ^while.body2, ^b1; ^while.body3: - i32 %26 = phi [^b2, i32 0] [^b5, i32 %35]; - [100 * i32]* %27 = getelementptr &([100 * [100 * i32]]* %8)[i64 0][i32 %26]; - [100 * i32]* %28 = getelementptr &([100 * [100 * i32]]* %6)[i64 0][i32 %26]; + i32 %23 = phi [^while.body1, i32 0] [^while.body3, i32 %27]; + i32 %24 = call () -> i32 @getint(); + [100 * i32]* %25 = getelementptr &([100 * [100 * i32]]* %7)[i64 0][i32 %23]; + i32* %26 = getelementptr &([100 * i32]* %25)[i64 0][i32 %17]; + store i32* %26 with i32 %24; + i32 %27 = add i32 %23, i32 1; + i1 %28 = icmp sgt i32 %12, i32 %27; + cbr i1 %28(prob = 0.984615), ^while.body3, ^b3; + ^b3: + i32 %29 = add i32 %17, i32 1; + i1 %30 = icmp sgt i32 %10, i32 %29; + cbr i1 %30(prob = 0.984615), ^while.body1, ^b2; + ^while.body4: + i32 %31 = phi [^b2, i32 0] [^b5, i32 %35]; + [100 * i32]* %32 = getelementptr &([100 * [100 * i32]]* %8)[i64 0][i32 %31]; + [100 * i32]* %33 = getelementptr &([100 * [100 * i32]]* %6)[i64 0][i32 %31]; cbr i1 %13(prob = 0.984615), ^while.body5, ^b5; ^b4: ret i32 0; - ^while.body4: - i32 %29 = phi [^indirect1, i32 0] [^while.body4, i32 %32]; - i32 %30 = call () -> i32 @getint(); - i32* %31 = getelementptr &([100 * i32]* %18)[i64 0][i32 %29]; - store i32* %31 with i32 %30; - i32 %32 = add i32 %29, i32 1; - i1 %33 = icmp sgt i32 %12, i32 %32; - cbr i1 %33(prob = 0.984615), ^while.body4, ^b3; ^while.body5: - i32 %34 = phi [^while.body3, i32 0] [^b6, i32 %39]; - cbr i1 %3(prob = 0.984615), ^indirect2, ^b6; + i32 %34 = phi [^while.body4, i32 0] [^b6, i32 %40]; + cbr i1 %3(prob = 0.984615), ^indirect1, ^b6; ^b5: - i32 %35 = add i32 %26, i32 1; + i32 %35 = add i32 %31, i32 1; i1 %36 = icmp sgt i32 %0, i32 %35; - cbr i1 %36(prob = 0.984615), ^while.body3, ^while.body6; - ^indirect2: - i32* %37 = getelementptr &([100 * i32]* %27)[i64 0][i32 %34]; + cbr i1 %36(prob = 0.984615), ^while.body4, ^while.body6; + ^indirect1: + i32* %37 = getelementptr &([100 * i32]* %32)[i64 0][i32 %34]; i32 %38 = load i32* %37; + [100 * i32]* %39 = getelementptr &([100 * [100 * i32]]* %7)[i64 0][i32 %34]; cbr i1 %4(prob = 0.941176), ^while.body7, ^scalar.header; ^b6: - i32 %39 = add i32 %34, i32 1; - i1 %40 = icmp sgt i32 %12, i32 %39; - cbr i1 %40(prob = 0.984615), ^while.body5, ^b5; + i32 %40 = add i32 %34, i32 1; + i1 %41 = icmp sgt i32 %12, i32 %40; + cbr i1 %41(prob = 0.984615), ^while.body5, ^b5; ^while.body6: - i32 %41 = phi [^b5, i32 0] [^b7, i32 %84]; - [100 * i32]* %42 = getelementptr &([100 * [100 * i32]]* %8)[i64 0][i32 %41]; + i32 %42 = phi [^b5, i32 0] [^b7, i32 %81]; + [100 * i32]* %43 = getelementptr &([100 * [100 * i32]]* %8)[i64 0][i32 %42]; cbr i1 %13(prob = 0.984615), ^while.body8, ^b7; ^while.body7: - i32 %43 = phi [^indirect2, i32 0] [^while.body7, i32 %73]; - i32 %44 = phi [^indirect2, i32 %38] [^while.body7, i32 %72]; - [100 * i32]* %45 = getelementptr &([100 * [100 * i32]]* %7)[i64 0][i32 %43]; - i32* %46 = getelementptr &([100 * i32]* %45)[i64 0][i32 %34]; - i32* %47 = getelementptr &([100 * i32]* %28)[i64 0][i32 %43]; - i32 %48 = load i32* %47; - i32 %49 = load i32* %46; - [100 * i32]* %50 = getelementptr &([100 * i32]* %45)[i64 1]; - i32* %51 = getelementptr &([100 * i32]* %50)[i64 0][i32 %34]; - i32* %52 = getelementptr &(i32* %47)[i64 1]; + i32 %44 = phi [^indirect1, i32 0] [^while.body7, i32 %70]; + i32 %45 = phi [^indirect1, i32 %38] [^while.body7, i32 %69]; + i32* %46 = getelementptr &([100 * i32]* %33)[i64 0][i32 %44]; + i32 %47 = load i32* %46; + i32* %48 = getelementptr &([100 * i32]* %39)[i64 0][i32 %44]; + i32 %49 = load i32* %48; + i32* %50 = getelementptr &(i32* %46)[i64 1]; + i32 %51 = load i32* %50; + i32* %52 = getelementptr &(i32* %48)[i64 1]; i32 %53 = load i32* %52; - i32 %54 = load i32* %51; - i32 %55 = mul i32 %53, i32 %54; - i32 %56 = mul i32 %48, i32 %49; - i32 %57 = add i32 %55, i32 %56; - [100 * i32]* %58 = getelementptr &([100 * i32]* %45)[i64 2]; - i32* %59 = getelementptr &([100 * i32]* %58)[i64 0][i32 %34]; - i32* %60 = getelementptr &(i32* %47)[i64 2]; - i32 %61 = load i32* %60; - i32 %62 = load i32* %59; - i32 %63 = mul i32 %61, i32 %62; - i32 %64 = add i32 %57, i32 %63; - [100 * i32]* %65 = getelementptr &([100 * i32]* %45)[i64 3]; - i32* %66 = getelementptr &([100 * i32]* %65)[i64 0][i32 %34]; - i32* %67 = getelementptr &(i32* %47)[i64 3]; - i32 %68 = load i32* %67; - i32 %69 = load i32* %66; - i32 %70 = mul i32 %68, i32 %69; - i32 %71 = add i32 %64, i32 %70; - i32 %72 = add i32 %44, i32 %71; - i32 %73 = add i32 %43, i32 4; - i1 %74 = icmp sgt i32 %5, i32 %73; - cbr i1 %74(prob = 0.941176), ^while.body7, ^scalar.header; + i32 %54 = mul i32 %51, i32 %53; + i32 %55 = mul i32 %47, i32 %49; + i32 %56 = add i32 %54, i32 %55; + i32* %57 = getelementptr &(i32* %46)[i64 2]; + i32 %58 = load i32* %57; + i32* %59 = getelementptr &(i32* %48)[i64 2]; + i32 %60 = load i32* %59; + i32 %61 = mul i32 %58, i32 %60; + i32 %62 = add i32 %56, i32 %61; + i32* %63 = getelementptr &(i32* %46)[i64 3]; + i32 %64 = load i32* %63; + i32* %65 = getelementptr &(i32* %48)[i64 3]; + i32 %66 = load i32* %65; + i32 %67 = mul i32 %64, i32 %66; + i32 %68 = add i32 %62, i32 %67; + i32 %69 = add i32 %45, i32 %68; + i32 %70 = add i32 %44, i32 4; + i1 %71 = icmp sgt i32 %5, i32 %70; + cbr i1 %71(prob = 0.941176), ^while.body7, ^scalar.header; ^scalar.header: - i32 %75 = phi [^indirect2, i32 0] [^while.body7, i32 %73]; - i32 %76 = phi [^indirect2, i32 %38] [^while.body7, i32 %72]; - i32 %77 = phi [^indirect2, i32 undef] [^while.body7, i32 %72]; - i1 %78 = icmp sgt i32 %2, i32 %75; - cbr i1 %78(prob = 0.75), ^while.body9, ^scalar.final; + i32 %72 = phi [^indirect1, i32 0] [^while.body7, i32 %70]; + i32 %73 = phi [^indirect1, i32 %38] [^while.body7, i32 %69]; + i32 %74 = phi [^indirect1, i32 undef] [^while.body7, i32 %69]; + i1 %75 = icmp sgt i32 %2, i32 %72; + cbr i1 %75(prob = 0.75), ^while.body9, ^scalar.final; ^while.body8: - i32 %79 = phi [^while.body6, i32 0] [^while.body8, i32 %82]; - i32* %80 = getelementptr &([100 * i32]* %42)[i64 0][i32 %79]; - i32 %81 = load i32* %80; - call (i32) -> void @putint(i32 %81); + i32 %76 = phi [^while.body6, i32 0] [^while.body8, i32 %79]; + i32* %77 = getelementptr &([100 * i32]* %43)[i64 0][i32 %76]; + i32 %78 = load i32* %77; + call (i32) -> void @putint(i32 %78); call (i32) -> void @putch(i32 32); - i32 %82 = add i32 %79, i32 1; - i1 %83 = icmp sgt i32 %12, i32 %82; - cbr i1 %83(prob = 0.984615), ^while.body8, ^b7; + i32 %79 = add i32 %76, i32 1; + i1 %80 = icmp sgt i32 %12, i32 %79; + cbr i1 %80(prob = 0.984615), ^while.body8, ^b7; ^b7: call (i32) -> void @putch(i32 10); - i32 %84 = add i32 %41, i32 1; - i1 %85 = icmp sgt i32 %0, i32 %84; - cbr i1 %85(prob = 0.984615), ^while.body6, ^b4; + i32 %81 = add i32 %42, i32 1; + i1 %82 = icmp sgt i32 %0, i32 %81; + cbr i1 %82(prob = 0.984615), ^while.body6, ^b4; ^scalar.final: - i32 %86 = phi [^scalar.header, i32 %77] [^while.body9, i32 %95]; - store i32* %37 with i32 %86; + i32 %83 = phi [^scalar.header, i32 %74] [^while.body9, i32 %91]; + store i32* %37 with i32 %83; ubr ^b6; ^while.body9 {scalar}: - i32 %87 = phi [^scalar.header, i32 %76] [^while.body9, i32 %95]; - i32 %88 = phi [^scalar.header, i32 %75] [^while.body9, i32 %96]; - [100 * i32]* %89 = getelementptr &([100 * [100 * i32]]* %7)[i64 0][i32 %88]; - i32* %90 = getelementptr &([100 * i32]* %89)[i64 0][i32 %34]; - i32* %91 = getelementptr &([100 * i32]* %28)[i64 0][i32 %88]; - i32 %92 = load i32* %91; - i32 %93 = load i32* %90; - i32 %94 = mul i32 %92, i32 %93; - i32 %95 = add i32 %87, i32 %94; - i32 %96 = add i32 %88, i32 1; - i1 %97 = icmp sgt i32 %2, i32 %96; - cbr i1 %97(prob = 0.75), ^while.body9, ^scalar.final; + i32 %84 = phi [^scalar.header, i32 %73] [^while.body9, i32 %91]; + i32 %85 = phi [^scalar.header, i32 %72] [^while.body9, i32 %92]; + i32* %86 = getelementptr &([100 * i32]* %33)[i64 0][i32 %85]; + i32 %87 = load i32* %86; + i32* %88 = getelementptr &([100 * i32]* %39)[i64 0][i32 %85]; + i32 %89 = load i32* %88; + i32 %90 = mul i32 %87, i32 %89; + i32 %91 = add i32 %84, i32 %90; + i32 %92 = add i32 %85, i32 1; + i1 %93 = icmp sgt i32 %2, i32 %92; + cbr i1 %93(prob = 0.75), ^while.body9, ^scalar.final; } diff --git a/tests/SysY2022/hidden_functional/23_json.riscv.s b/tests/SysY2022/hidden_functional/23_json.riscv.s index ab2e62a79..230efb1b5 100644 --- a/tests/SysY2022/hidden_functional/23_json.riscv.s +++ b/tests/SysY2022/hidden_functional/23_json.riscv.s @@ -57,16 +57,16 @@ label27: lw a0, 0(s1) sh2add a2, a0, s0 lw a1, 0(a2) + xori t1, a1, 10 xori a4, a1, 9 xori t0, a1, 32 sltiu a5, a4, 1 sltiu a3, t0, 1 - xori t0, a1, 10 + sltiu a4, t1, 1 or a2, a3, a5 - sltiu a4, t0, 1 xori a5, a1, 13 - sltiu t1, a5, 1 - or a3, a4, t1 + sltiu t0, a5, 1 + or a3, a4, t0 or a1, a2, a3 beq a1, zero, label30 addiw a0, a0, 1 @@ -83,9 +83,9 @@ label179: sltiu a4, t1, 1 or a3, a5, t0 xori a5, a0, 13 - sltiu t0, a5, 1 - or t1, a4, t0 - or a0, a3, t1 + sltiu t2, a5, 1 + or t0, a4, t2 + or a0, a3, t0 beq a0, zero, label11 addiw a0, a2, 1 sw a0, 0(s1) @@ -128,14 +128,14 @@ label123: lw a1, 0(s1) sh2add a2, a1, s0 lw a0, 0(a2) - xori t1, a0, 10 xori a4, a0, 9 - xori t0, a0, 32 - sltiu a5, a4, 1 - sltiu a3, t0, 1 - sltiu a4, t1, 1 + xori a5, a0, 32 + sltiu t0, a4, 1 + sltiu a3, a5, 1 + xori a5, a0, 10 + or a2, a3, t0 + sltiu a4, a5, 1 xori t0, a0, 13 - or a2, a3, a5 sltiu a3, t0, 1 or a5, a4, a3 or a0, a2, a5 @@ -147,17 +147,17 @@ label176: lw a0, 0(s1) sh2add a4, a0, s0 lw a1, 0(a4) - xori t0, a1, 13 - xori t1, a1, 10 + xori t0, a1, 10 xori a4, a1, 9 xori a2, a1, 32 sltiu a5, a4, 1 sltiu a3, a2, 1 - sltiu a4, t1, 1 + sltiu a4, t0, 1 or a2, a3, a5 - sltiu a3, t0, 1 - or a5, a4, a3 - or a1, a2, a5 + xori a5, a1, 13 + sltiu a3, a5, 1 + or t0, a4, a3 + or a1, a2, t0 beq a1, zero, label119 addiw a0, a0, 1 j label117 @@ -193,7 +193,6 @@ label39: lw a0, 0(s1) sh2add a4, a0, s0 lw a1, 0(a4) - xori t0, a1, 13 xori t1, a1, 10 xori a4, a1, 9 xori a2, a1, 32 @@ -201,8 +200,9 @@ label39: sltiu a3, a2, 1 sltiu a4, t1, 1 or a2, a3, a5 - sltiu a5, t0, 1 - or a3, a4, a5 + xori a5, a1, 13 + sltiu t0, a5, 1 + or a3, a4, t0 or a1, a2, a3 bne a1, zero, label39 label42: @@ -215,13 +215,13 @@ label42: j label188 label64: lw a1, 0(s1) - sh2add a4, a1, s0 - lw a0, 0(a4) - xori a4, a0, 45 - xori a3, a0, 43 - sltiu a5, a4, 1 - sltiu a2, a3, 1 - or a3, a2, a5 + sh2add a3, a1, s0 + lw a0, 0(a3) + xori a5, a0, 45 + xori a4, a0, 43 + sltiu t0, a5, 1 + sltiu a2, a4, 1 + or a3, a2, t0 beq a3, zero, label65 addiw a1, a1, 1 sw a1, 0(s1) @@ -247,17 +247,17 @@ label70: li a3, 46 sh2add a2, a0, s0 lw a1, 0(a2) - bne a1, a3, label77 -label74: + bne a1, a3, label73 +label90: addiw a0, a0, 1 sw a0, 0(s1) - ble s2, a0, label77 - sh2add a2, a0, s0 + ble s2, a0, label73 + sh2add a3, a0, s0 li a4, 10 - lw a3, 0(a2) - addiw a1, a3, -48 - bltu a1, a4, label74 - j label77 + lw a2, 0(a3) + addiw a1, a2, -48 + bltu a1, a4, label90 + j label73 label55: lw a0, 0(s1) sh2add a3, a0, s0 @@ -300,8 +300,17 @@ label99: li a0, 91 beq a1, a0, label555 li a0, 34 - bne a1, a0, label102 - li a1, 2 + beq a1, a0, label559 + xori a0, a1, 43 + xori a4, a1, 45 + addiw a3, a1, -48 + sltiu a2, a0, 1 + sltiu a5, a4, 1 + sltiu a4, a3, 10 + or a0, a2, a5 + or a2, a0, a4 + beq a2, zero, label103 + li a1, 1 lw a0, 0(s1) j label4 label19: @@ -381,18 +390,18 @@ label129: sw a0, 0(s1) ble s2, a0, label133 lw a0, 0(s1) - sh2add a4, a0, s0 - lw a1, 0(a4) - xori t0, a1, 13 - xori t1, a1, 10 + sh2add a2, a0, s0 + lw a1, 0(a2) + xori t1, a1, 13 xori a4, a1, 9 - xori a2, a1, 32 - sltiu a5, a4, 1 - sltiu a3, a2, 1 - sltiu a4, t1, 1 - or a2, a3, a5 - sltiu a5, t0, 1 - or a3, a4, a5 + xori a5, a1, 32 + sltiu t0, a4, 1 + sltiu a3, a5, 1 + xori a5, a1, 10 + or a2, a3, t0 + sltiu a4, a5, 1 + sltiu t0, t1, 1 + or a3, a4, t0 or a1, a2, a3 beq a1, zero, label133 addiw a0, a0, 1 @@ -407,24 +416,11 @@ label94: addiw a1, a0, 1 sw a1, 0(s1) j label68 -label102: - xori a0, a1, 43 - xori a5, a1, 45 - addiw a3, a1, -48 - sltiu a2, a0, 1 - sltiu a4, a5, 1 - sltiu a5, a3, 10 - or a0, a2, a4 - or a2, a0, a5 - beq a2, zero, label103 - li a1, 1 - lw a0, 0(s1) - j label4 label72: lw a0, 0(s1) -label77: - bgt s2, a0, label79 -label480: +label73: + bgt s2, a0, label75 +label459: li a0, 1 j label7 label133: @@ -447,7 +443,22 @@ label140: label142: addiw a0, a0, 1 sw a0, 0(s1) - bgt s2, a0, label161 + ble s2, a0, label145 + lw a0, 0(s1) + sh2add a4, a0, s0 + lw a1, 0(a4) + xori t1, a1, 10 + xori a4, a1, 9 + xori a2, a1, 32 + sltiu a5, a4, 1 + sltiu a3, a2, 1 + sltiu a4, t1, 1 + or a2, a3, a5 + xori a5, a1, 13 + sltiu t0, a5, 1 + or a3, a4, t0 + or a1, a2, a3 + bne a1, zero, label142 label145: li a0, 2 mv a1, s0 @@ -457,56 +468,60 @@ label145: beq a0, zero, label188 lw a0, 0(s1) label147: - ble s2, a0, label160 + bgt s2, a0, label149 lw a1, 0(s1) +label151: + ble s2, a1, label188 sh2add a3, a1, s0 - lw a0, 0(a3) - xori a5, a0, 10 - xori a3, a0, 9 - xori a2, a0, 32 - sltiu t0, a3, 1 - sltiu a4, a2, 1 - sltiu a3, a5, 1 - or a2, a4, t0 - xori a4, a0, 13 - sltiu t0, a4, 1 - or a5, a3, t0 - or a0, a2, a5 - beq a0, zero, label149 + li a4, 58 + lw a2, 0(a3) + bne a2, a4, label188 addiw a0, a1, 1 +label155: sw a0, 0(s1) - j label147 -label79: + ble s2, a0, label159 + lw a0, 0(s1) + sh2add a4, a0, s0 + lw a1, 0(a4) + xori t1, a1, 13 + xori t0, a1, 10 + xori a4, a1, 9 + xori a2, a1, 32 + sltiu a5, a4, 1 + sltiu a3, a2, 1 + sltiu a4, t0, 1 + or a2, a3, a5 + sltiu a5, t1, 1 + or a3, a4, a5 + or a1, a2, a3 + beq a1, zero, label159 + addiw a0, a0, 1 + j label155 +label75: lw a0, 0(s1) li a3, 101 sh2add a1, a0, s0 lw a2, 0(a1) - bne a2, a3, label480 + bne a2, a3, label459 addiw a0, a0, 1 sw a0, 0(s1) - bgt s2, a0, label81 -label84: - bgt s2, a0, label92 + bgt s2, a0, label77 +label80: + bgt s2, a0, label88 lw a0, 0(s1) - j label86 -label92: - lw a0, 0(s1) - li a3, 45 - sh2add a2, a0, s0 - lw a1, 0(a2) - bne a1, a3, label86 -label519: + j label82 +label498: addiw a0, a0, 1 sw a0, 0(s1) -label86: - ble s2, a0, label480 +label82: + ble s2, a0, label459 lw a0, 0(s1) li a4, 10 - sh2add a3, a0, s0 - lw a2, 0(a3) - addiw a1, a2, -48 - bgeu a1, a4, label480 - j label519 + sh2add a2, a0, s0 + lw a3, 0(a2) + addiw a1, a3, -48 + bgeu a1, a4, label459 + j label498 label137: lw a0, 0(s1) sh2add a3, a0, s0 @@ -518,63 +533,59 @@ label137: sltiu a4, a2, 1 sltiu a3, t0, 1 or a2, a4, a5 - xori a4, a1, 13 - sltiu a5, a4, 1 - or t0, a3, a5 + xori a5, a1, 13 + sltiu a4, a5, 1 + or t0, a3, a4 or a1, a2, t0 beq a1, zero, label140 addiw a0, a0, 1 sw a0, 0(s1) j label135 -label161: - lw a0, 0(s1) - sh2add a2, a0, s0 - lw a1, 0(a2) - xori t1, a1, 10 - xori t0, a1, 9 - xori a4, a1, 32 - sltiu a5, t0, 1 - sltiu a3, a4, 1 - sltiu a4, t1, 1 - or a2, a3, a5 - xori a5, a1, 13 - sltiu t0, a5, 1 - or a3, a4, t0 - or a1, a2, a3 - bne a1, zero, label142 - j label145 -label160: - lw a1, 0(s1) label149: - ble s2, a1, label188 + lw a1, 0(s1) sh2add a3, a1, s0 - li a4, 58 - lw a2, 0(a3) - bne a2, a4, label188 + lw a0, 0(a3) + xori a5, a0, 10 + xori a3, a0, 9 + xori a2, a0, 32 + sltiu t0, a3, 1 + sltiu a4, a2, 1 + sltiu a3, a5, 1 + or a2, a4, t0 + xori t0, a0, 13 + sltiu a4, t0, 1 + or a5, a3, a4 + or a0, a2, a5 + beq a0, zero, label151 addiw a0, a1, 1 -label153: sw a0, 0(s1) - bgt s2, a0, label155 -label157: - mv a0, zero - mv a1, s0 - mv a2, s2 - mv a3, s1 - jal detect_item - bne a0, zero, label667 - j label188 -label103: - li a0, 116 - bne a1, a0, label104 - li a1, 5 + j label147 +label88: lw a0, 0(s1) - j label4 -label81: + li a3, 45 + sh2add a2, a0, s0 + lw a1, 0(a2) + beq a1, a3, label498 + j label82 +label77: lw a0, 4(a1) li a2, 43 - beq a0, a2, label82 + bne a0, a2, label78 + lw a1, 0(s1) + addiw a0, a1, 1 + sw a0, 0(s1) + j label80 +label78: lw a0, 0(s1) - j label84 + j label80 +label103: + li a0, 116 + beq a1, a0, label572 + li a0, 102 + beq a1, a0, label576 + li a2, 110 + beq a1, a2, label106 + j label188 label304: lw a0, 0(s1) label46: @@ -606,61 +617,38 @@ label49: addiw a0, a0, 1 sw a0, 0(s1) j label46 -label104: - li a0, 102 - bne a1, a0, label105 - li a1, 6 - lw a0, 0(s1) - j label4 -label155: - lw a0, 0(s1) - sh2add a2, a0, s0 - lw a1, 0(a2) - xori a3, a1, 9 - xori t0, a1, 32 - sltiu a5, a3, 1 - sltiu a4, t0, 1 - xori t0, a1, 10 - or a2, a4, a5 - sltiu a3, t0, 1 - xori a5, a1, 13 - sltiu a4, a5, 1 - or t0, a3, a4 - or a1, a2, t0 - beq a1, zero, label157 - addiw a0, a0, 1 - j label153 -label105: - li a2, 110 - bne a1, a2, label188 - li a0, 7 +label159: + mv a0, zero mv a1, s0 mv a2, s2 mv a3, s1 jal detect_item - j label7 + bne a0, zero, label667 + j label188 label695: lw a0, 0(s1) label164: ble s2, a0, label166 lw a0, 0(s1) - sh2add a2, a0, s0 - lw a1, 0(a2) + sh2add a3, a0, s0 + lw a1, 0(a3) + xori t0, a1, 10 xori a3, a1, 9 - xori a5, a1, 32 - sltiu t0, a3, 1 + xori a2, a1, 32 + sltiu a5, a3, 1 + sltiu a4, a2, 1 + sltiu a3, t0, 1 + or a2, a4, a5 + xori a5, a1, 13 sltiu a4, a5, 1 - xori a5, a1, 10 - or a2, a4, t0 - sltiu a3, a5, 1 - xori a4, a1, 13 - sltiu t0, a4, 1 - or a5, a3, t0 - or a1, a2, a5 + or t0, a3, a4 + or a1, a2, t0 beq a1, zero, label169 addiw a0, a0, 1 sw a0, 0(s1) j label164 +label166: + lw a0, 0(s1) label169: ble s2, a0, label188 sh2add a2, a0, s0 @@ -668,24 +656,35 @@ label169: lw a1, 0(a2) bne a1, a3, label188 j label361 -label450: +label555: + li a1, 3 lw a0, 0(s1) - j label70 -label166: + j label4 +label559: + li a1, 2 + lw a0, 0(s1) + j label4 +label576: + li a1, 6 + lw a0, 0(s1) + j label4 +label572: + li a1, 5 lw a0, 0(s1) - j label169 + j label4 +label106: + li a0, 7 + mv a1, s0 + mv a2, s2 + mv a3, s1 + jal detect_item + j label7 label551: li a1, 4 j label4 -label555: - li a1, 3 +label450: lw a0, 0(s1) - j label4 -label82: - lw a1, 0(s1) - addiw a0, a1, 1 - sw a0, 0(s1) - j label84 + j label70 .p2align 2 .globl main main: @@ -701,39 +700,39 @@ main: sw zero, 48(sp) jal getch li s3, 35 -pcrel1213: +pcrel1208: auipc a1, %pcrel_hi(buffer) - addi s0, a1, %pcrel_lo(pcrel1213) - beq a0, s3, label1129 + addi s0, a1, %pcrel_lo(pcrel1208) + beq a0, s3, label1124 mv s2, s0 mv s4, zero - j label1100 + j label1095 .p2align 2 -label1105: +label1100: addi s2, s2, 4 mv s4, a1 .p2align 2 -label1100: +label1095: sw a0, 0(s2) jal getch addiw a1, s4, 1 - bne a0, s3, label1105 + bne a0, s3, label1100 lw a0, 48(sp) - mv t2, a1 -label1109: - bgt t2, a0, label1121 -label1111: + mv t3, a1 +label1104: + bgt t3, a0, label1116 +label1106: mv a0, zero mv a1, s0 - mv a2, t2 + mv a2, t3 mv a3, s1 jal detect_item mv a1, a0 lw a0, 48(sp) -label1112: - bgt t2, a0, label1114 -label1116: - beq a1, zero, label1117 +label1107: + bgt t3, a0, label1109 +label1111: + beq a1, zero, label1112 li a0, 111 jal putch li a0, 107 @@ -741,8 +740,8 @@ label1116: li a0, 10 jal putch mv a0, zero - j label1118 -label1121: + j label1113 +label1116: lw a1, 48(sp) sh2add a2, a1, s0 lw a0, 0(a2) @@ -757,11 +756,11 @@ label1121: sltiu a4, t0, 1 or a5, a3, a4 or a0, a2, a5 - beq a0, zero, label1111 + beq a0, zero, label1106 addiw a0, a1, 1 sw a0, 48(sp) - j label1109 -label1114: + j label1104 +label1109: lw a0, 48(sp) sh2add a3, a0, s0 lw a2, 0(a3) @@ -776,11 +775,11 @@ label1114: sltiu t1, a5, 1 or t0, a4, t1 or a2, a3, t0 - beq a2, zero, label1116 + beq a2, zero, label1111 addiw a0, a0, 1 sw a0, 48(sp) - j label1112 -label1117: + j label1107 +label1112: li a0, 110 jal putch li a0, 111 @@ -796,7 +795,7 @@ label1117: li a0, 10 jal putch li a0, 1 -label1118: +label1113: ld ra, 0(sp) ld s1, 8(sp) ld s0, 16(sp) @@ -805,7 +804,7 @@ label1118: ld s4, 40(sp) addi sp, sp, 56 ret -label1129: +label1124: mv a0, zero - mv t2, zero - j label1109 + mv t3, zero + j label1104 diff --git a/tests/SysY2022/hidden_functional/28_side_effect2.riscv.s b/tests/SysY2022/hidden_functional/28_side_effect2.riscv.s index 5e4d41fba..3bcf01877 100644 --- a/tests/SysY2022/hidden_functional/28_side_effect2.riscv.s +++ b/tests/SysY2022/hidden_functional/28_side_effect2.riscv.s @@ -26,23 +26,13 @@ main: sd zero, 80(sp) j label2 .p2align 2 -label622: - mv t2, zero - j label625 -.p2align 2 -label124: - addiw t1, a1, 3 - blt t0, a5, label631 - sw a0, 16(sp) - lw t2, 12(sp) - bne t2, zero, label126 -.p2align 2 -label210: +label206: + sw a0, 8(sp) + mv t2, a0 + bne a0, zero, label7 mv a1, t1 -.p2align 2 -label7: addiw t0, t0, 1 - bge t0, a3, label959 + bge t0, a3, label957 .p2align 2 label2: addiw t1, a1, 1 @@ -50,512 +40,530 @@ label2: mv t2, zero j label210 .p2align 2 -label121: +label7: addiw t1, a1, 2 - blt t0, a4, label622 - sw a0, 12(sp) - lw t2, 8(sp) - bne t2, zero, label124 + bge t0, a4, label215 + mv t2, zero mv a1, t1 addiw t0, t0, 1 blt t0, a3, label2 - j label959 + j label957 .p2align 2 -label206: - sw a0, 8(sp) - mv t2, a0 - bne a0, zero, label121 +label10: + addiw t1, a1, 3 + bge t0, a5, label224 mv a1, t1 addiw t0, t0, 1 blt t0, a3, label2 - j label959 -label126: + j label957 +label224: + sw a0, 16(sp) + lw t2, 12(sp) + beq t2, zero, label940 addiw t1, a1, 4 li t2, 4 - blt t0, t2, label210 + bge t0, t2, label13 +label940: + mv a1, t1 +label67: + addiw t0, t0, 1 + blt t0, a3, label2 + j label957 +.p2align 2 +label215: + sw a0, 12(sp) + lw t2, 8(sp) + bne t2, zero, label10 + mv a1, t1 + addiw t0, t0, 1 + blt t0, a3, label2 + j label957 +label13: sw a0, 20(sp) lw t2, 16(sp) - bne t2, zero, label128 - j label210 -label959: + bne t2, zero, label14 + j label940 +.p2align 2 +label210: + mv a1, t1 + addiw t0, t0, 1 + blt t0, a3, label2 +label957: mv t0, zero - j label9 -label66: + j label69 +label126: addiw t1, a1, 8 li t2, 4 - blt t0, t2, label225 - sw zero, 20(sp) - lw t2, 16(sp) - beq t2, zero, label68 + bge t0, t2, label127 .p2align 2 -label225: +label447: mv a1, t1 .p2align 2 -label14: +label74: addiw t0, t0, 1 - bge t0, a3, label229 + bge t0, a3, label451 .p2align 2 -label9: +label69: addiw t1, a1, 2 - ble t0, zero, label221 - sw zero, 8(sp) - mv t2, zero - addiw t1, a1, 4 - bge t0, a4, label956 -.p2align 2 -label399: - mv t2, a0 - bne a0, zero, label406 - addiw t2, a1, 6 - blt t0, a5, label411 -label940: - sw zero, 16(sp) - lw t1, 12(sp) - beq t1, zero, label66 -label418: - mv a1, t2 - j label14 -.p2align 2 -label221: + bgt t0, zero, label442 mv t2, a0 - bne a0, zero, label225 + bne a0, zero, label447 addiw t1, a1, 4 - blt t0, a4, label399 + blt t0, a4, label621 sw zero, 12(sp) lw t2, 8(sp) - bne t2, zero, label406 + bne t2, zero, label628 addiw t2, a1, 6 - blt t0, a5, label411 - j label940 + blt t0, a5, label633 + j label945 .p2align 2 -label956: - sw zero, 12(sp) - lw t2, 8(sp) - beq t2, zero, label960 +label442: + sw zero, 8(sp) + mv t2, zero + addiw t1, a1, 4 + bge t0, a4, label959 .p2align 2 -label406: +label621: + mv t2, a0 + beq a0, zero, label944 +.p2align 2 +label628: mv a1, t1 addiw t0, t0, 1 - blt t0, a3, label9 -label229: + blt t0, a3, label69 +label451: mv a4, a0 -label16: - bge a4, a3, label19 +label76: + bge a4, a3, label79 addiw a5, a4, -1 addiw a1, a1, 1 - ble a4, a5, label19 + ble a4, a5, label79 sh2add t0, a5, a2 sw a0, 0(t0) - beq a5, zero, label59 - lw a5, -4(t0) -label56: - beq a5, zero, label19 - addiw a4, a4, 1 - j label16 -label960: + bne a5, zero, label115 + lw a5, 8(sp) + j label116 +.p2align 2 +label959: + sw zero, 12(sp) + lw t2, 8(sp) + bne t2, zero, label628 addiw t2, a1, 6 - bge t0, a5, label940 -label411: + bge t0, a5, label945 +label633: mv a1, t2 addiw t0, t0, 1 - blt t0, a3, label9 - j label229 -label19: + blt t0, a3, label69 + j label451 +label944: + addiw t2, a1, 6 + blt t0, a5, label633 +label945: + sw zero, 16(sp) + lw t1, 12(sp) + beq t1, zero, label126 +label640: + mv a1, t2 + j label74 +label127: + sw zero, 20(sp) + lw t2, 16(sp) + bne t2, zero, label447 + addiw t2, a1, 10 + li t1, 5 + blt t0, t1, label640 + sw zero, 24(sp) + lw t1, 20(sp) + bne t1, zero, label640 + addiw t1, a1, 12 + li t2, 6 + blt t0, t2, label447 + sw zero, 28(sp) + lw t2, 24(sp) + bne t2, zero, label447 + addiw t2, a1, 14 + li t1, 7 + blt t0, t1, label640 + sw zero, 32(sp) + lw t1, 28(sp) + bne t1, zero, label640 + addiw t1, a1, 16 + li t2, 8 + blt t0, t2, label447 + sw zero, 36(sp) + lw t2, 32(sp) + bne t2, zero, label447 + addiw t2, a1, 18 + li t1, 9 + blt t0, t1, label640 + sw zero, 40(sp) + lw t1, 36(sp) + bne t1, zero, label640 + addiw t3, a1, 20 + li t2, 10 + bge t0, t2, label716 + mv t1, a0 + j label140 +label79: lw a3, 8(sp) addiw a2, a1, 3 - bne a3, zero, label50 + beq a3, zero, label462 + lw a4, 12(sp) + addiw a1, a1, 6 + bne a4, zero, label81 + j label111 +label462: mv a1, a2 -label51: +label111: lw a2, 16(sp) addiw a5, a1, 3 addiw a4, a1, 6 mv a1, a5 - beq a2, zero, label21 + beq a2, zero, label81 mv a1, a4 -label21: +label81: lw a4, 24(sp) addiw a2, a1, 3 - beq a4, zero, label248 + beq a4, zero, label470 lw a5, 28(sp) addiw a2, a1, 6 - beq a5, zero, label46 + beq a5, zero, label106 lw a5, 32(sp) addiw a2, a1, 9 - bne a5, zero, label46 - lw a5, 36(sp) - addiw a2, a1, 12 - beq a5, zero, label46 -label248: + beq a5, zero, label109 + j label106 +label470: mv a1, a2 -label23: +label83: lw a5, 44(sp) addiw a2, a1, 3 - beq a5, zero, label254 + beq a5, zero, label476 lw a2, 48(sp) addiw a1, a1, 6 - beq a2, zero, label25 - j label39 -label254: - mv a1, a2 -label39: - lw a5, 52(sp) - addiw a2, a1, 3 - beq a5, zero, label311 - lw a5, 56(sp) - addiw a2, a1, 6 - beq a5, zero, label311 - lw a5, 60(sp) - addiw a2, a1, 9 - bne a5, zero, label43 -label311: + beq a2, zero, label85 + j label99 +label533: mv a1, a2 -label25: +label85: addiw a2, a1, 3 - beq a3, zero, label258 - lw a3, 16(sp) - addiw a2, a1, 6 - bne a3, zero, label34 -label258: + bne a3, zero, label93 +label480: mv a1, a2 -label27: +label87: lw a3, 28(sp) addiw a2, a1, 3 - beq a3, zero, label29 -label266: + beq a3, zero, label89 +label488: mv a1, a2 -label36: +label96: addw a0, a1, a0 jal putint ld ra, 0(sp) mv a0, zero addi sp, sp, 88 ret -label29: - lw a3, 32(sp) +label476: + mv a1, a2 +label99: + lw a5, 52(sp) + addiw a2, a1, 3 + beq a5, zero, label533 + lw a5, 56(sp) addiw a2, a1, 6 - beq a3, zero, label30 - lw a3, 36(sp) + beq a5, zero, label533 + lw a5, 60(sp) addiw a2, a1, 9 - beq a3, zero, label266 - j label30 -label68: - addiw t2, a1, 10 - li t1, 5 - blt t0, t1, label418 - sw zero, 24(sp) - lw t1, 20(sp) - bne t1, zero, label418 - addiw t1, a1, 12 - li t2, 6 - blt t0, t2, label225 - sw zero, 28(sp) - lw t2, 24(sp) - bne t2, zero, label225 - addiw t2, a1, 14 - li t1, 7 - blt t0, t1, label418 - sw zero, 32(sp) - lw t1, 28(sp) - bne t1, zero, label418 - addiw t1, a1, 16 - li t2, 8 - blt t0, t2, label225 - sw zero, 36(sp) - lw t2, 32(sp) - bne t2, zero, label225 - addiw t2, a1, 18 - li t1, 9 - blt t0, t1, label418 - sw zero, 40(sp) - lw t1, 36(sp) - bne t1, zero, label418 - addiw t3, a1, 20 - li t2, 10 - bge t0, t2, label494 - mv t1, a0 - j label80 -label50: - lw a4, 12(sp) - addiw a1, a1, 6 - bne a4, zero, label21 - j label51 -label46: - addiw a1, a2, 3 - j label23 -label30: + beq a5, zero, label533 + lw a5, 64(sp) + addiw a2, a1, 12 + addiw t0, a1, 15 + mv a1, t0 + bne a5, zero, label85 + mv a1, a2 + j label85 +label89: + lw a3, 32(sp) + addiw a2, a1, 6 + bne a3, zero, label92 +label90: lw a3, 40(sp) addiw a1, a2, 3 sltu a0, zero, a3 - j label36 -label128: + j label96 +label115: + lw a5, -4(t0) +label116: + beq a5, zero, label79 + addiw a4, a4, 1 + j label76 +label93: + lw a3, 16(sp) + addiw a2, a1, 6 + bne a3, zero, label94 + j label480 +label106: + addiw a1, a2, 3 + j label83 +label14: addiw t1, a1, 5 li t2, 5 - blt t0, t2, label210 + blt t0, t2, label940 sw a0, 24(sp) lw t2, 20(sp) - beq t2, zero, label210 + bne t2, zero, label16 + j label940 +label94: + lw a3, 20(sp) + addiw a2, a1, 9 + bne a3, zero, label480 + addiw a1, a1, 12 + beq a4, zero, label96 + j label87 +label92: + lw a3, 36(sp) + addiw a2, a1, 9 + beq a3, zero, label488 + j label90 +label109: + lw a5, 36(sp) + addiw a2, a1, 12 + bne a5, zero, label470 + j label106 +label16: addiw t1, a1, 6 li t2, 6 - blt t0, t2, label210 + blt t0, t2, label940 sw a0, 28(sp) lw t2, 24(sp) - beq t2, zero, label210 + beq t2, zero, label940 addiw t1, a1, 7 li t2, 7 - blt t0, t2, label210 + blt t0, t2, label940 sw a0, 32(sp) lw t2, 28(sp) - beq t2, zero, label210 + beq t2, zero, label940 addiw t1, a1, 8 li t2, 8 - blt t0, t2, label210 + blt t0, t2, label940 sw a0, 36(sp) lw t2, 32(sp) - beq t2, zero, label210 + beq t2, zero, label940 addiw t1, a1, 9 li t2, 9 - blt t0, t2, label210 + blt t0, t2, label940 sw a0, 40(sp) lw t2, 36(sp) - beq t2, zero, label210 - addiw t1, a1, 10 - li t3, 10 - bge t0, t3, label714 - mv t2, zero - j label139 -label34: - lw a3, 20(sp) - addiw a2, a1, 9 - beq a3, zero, label35 - j label258 -label631: - mv a1, t1 - addiw t0, t0, 1 - blt t0, a3, label2 - j label959 -label625: - mv a1, t1 - addiw t0, t0, 1 - blt t0, a3, label2 - j label959 -label43: - lw a5, 64(sp) - addiw a2, a1, 12 - addiw t0, a1, 15 - mv a1, t0 - bne a5, zero, label25 - mv a1, a2 - j label25 -label35: - addiw a1, a1, 12 - beq a4, zero, label36 - j label27 -label494: + bne t2, zero, label24 + j label940 +label716: sw zero, 44(sp) lw t1, 40(sp) -label80: - beq t1, zero, label82 -label502: - mv a1, t3 - j label14 -label82: +label140: + bne t1, zero, label724 addiw t2, a1, 22 li t3, 11 - bge t0, t3, label506 + bge t0, t3, label728 mv t1, a0 - j label84 -label714: + j label144 +label24: + addiw t1, a1, 10 + li t3, 10 + bge t0, t3, label308 + mv t2, zero + j label25 +label308: sw a0, 44(sp) lw t2, 40(sp) -label139: - beq t2, zero, label210 - addiw t1, a1, 11 - li t3, 11 - bge t0, t3, label723 - mv t2, zero -label142: - beq t2, zero, label210 - addiw t1, a1, 12 - li t3, 12 - blt t0, t3, label733 - sw a0, 52(sp) - lw t2, 48(sp) -label146: - beq t2, zero, label210 - addiw t1, a1, 13 - li t3, 13 - blt t0, t3, label745 - sw a0, 56(sp) - lw t2, 52(sp) -label149: - bne t2, zero, label151 - j label210 -label506: +label25: + bne t2, zero, label27 + j label940 +label728: sw zero, 48(sp) lw t1, 44(sp) -label84: - bne t1, zero, label418 +label144: + bne t1, zero, label640 addiw t3, a1, 24 li t2, 12 - bge t0, t2, label518 + bge t0, t2, label740 mv t1, a0 -label87: - bne t1, zero, label502 +label147: + bne t1, zero, label724 addiw t2, a1, 26 li t3, 13 - blt t0, t3, label528 + blt t0, t3, label750 sw zero, 56(sp) lw t1, 52(sp) - j label91 -label151: - addiw t1, a1, 14 - li t3, 14 - blt t0, t3, label754 - sw a0, 60(sp) - lw t2, 56(sp) -label152: - beq t2, zero, label210 - addiw t1, a1, 15 - li t3, 15 - blt t0, t3, label763 - sw a0, 64(sp) - lw t2, 60(sp) -label155: - beq t2, zero, label210 - addiw t1, a1, 16 - li t3, 16 - blt t0, t3, label772 - sw a0, 68(sp) - lw t2, 64(sp) -label158: - bne t2, zero, label160 - j label210 -label528: + j label151 +label750: mv t1, a0 -label91: - bne t1, zero, label418 +label151: + bne t1, zero, label640 addiw t3, a1, 28 li t2, 14 - blt t0, t2, label540 + blt t0, t2, label762 sw zero, 60(sp) lw t1, 56(sp) - j label95 -label540: + j label155 +label762: mv t1, a0 -label95: - bne t1, zero, label502 +label155: + bne t1, zero, label724 addiw t2, a1, 30 li t3, 15 - bge t0, t3, label551 + blt t0, t3, label774 + sw zero, 64(sp) + lw t1, 60(sp) + j label158 +label774: mv t1, a0 -label98: - bne t1, zero, label418 +label158: + bne t1, zero, label640 addiw t3, a1, 32 li t2, 16 - bge t0, t2, label560 + blt t0, t2, label783 + sw zero, 68(sp) + lw t1, 64(sp) + j label162 +label783: mv t1, a0 -label102: - bne t1, zero, label502 - addiw t2, a1, 34 - li t3, 17 - bge t0, t3, label572 +label162: + bne t1, zero, label724 + addiw t4, a1, 34 + li t2, 17 + blt t0, t2, label795 + sw zero, 72(sp) + lw t1, 68(sp) + j label166 +label795: mv t1, a0 -label106: - bne t1, zero, label418 - addiw t3, a1, 36 - li t2, 18 - blt t0, t2, label585 - sw zero, 76(sp) - lw t1, 72(sp) - j label110 -label585: +label166: + beq t1, zero, label168 + mv a1, t4 + j label74 +label168: + addiw t2, a1, 36 + li t3, 18 + bge t0, t3, label806 mv t1, a0 -label110: - bne t1, zero, label502 - addiw t2, a1, 38 - li t3, 19 - bge t0, t3, label596 +label170: + bne t1, zero, label640 + addiw t3, a1, 38 + li t2, 19 + blt t0, t2, label819 + sw zero, 80(sp) + lw t1, 76(sp) + j label174 +label819: mv t1, a0 -label114: - bne t1, zero, label418 +label174: + bne t1, zero, label724 addiw a1, a1, 40 - blt t0, a3, label14 + blt t0, a3, label74 sw zero, 84(sp) - j label14 -label160: + j label74 +label27: + addiw t1, a1, 11 + li t3, 11 + bge t0, t3, label317 + mv t2, zero +label28: + beq t2, zero, label940 + addiw t1, a1, 12 + li t3, 12 + blt t0, t3, label327 + sw a0, 52(sp) + lw t2, 48(sp) + j label32 +label327: + mv t2, zero +label32: + beq t2, zero, label940 + addiw t1, a1, 13 + li t3, 13 + bge t0, t3, label338 + mv t2, zero + j label35 +label338: + sw a0, 56(sp) + lw t2, 52(sp) +label35: + beq t2, zero, label940 + addiw t1, a1, 14 + li t3, 14 + blt t0, t3, label348 + sw a0, 60(sp) + lw t2, 56(sp) +label38: + beq t2, zero, label940 + addiw t1, a1, 15 + li t3, 15 + bge t0, t3, label356 + mv t2, zero +label41: + beq t2, zero, label940 + addiw t1, a1, 16 + li t3, 16 + bge t0, t3, label365 + mv t2, zero +label44: + beq t2, zero, label940 addiw t1, a1, 17 li t3, 17 - bge t0, t3, label780 + bge t0, t3, label374 mv t2, zero - j label161 -label780: - sw a0, 72(sp) - lw t2, 68(sp) -label161: - beq t2, zero, label210 +label47: + beq t2, zero, label940 addiw t1, a1, 18 li t3, 18 - bge t0, t3, label789 + bge t0, t3, label383 mv t2, zero - j label165 -label789: + j label51 +label383: sw a0, 76(sp) lw t2, 72(sp) -label165: - beq t2, zero, label210 +label51: + beq t2, zero, label940 addiw t1, a1, 19 li t3, 19 - bge t0, t3, label801 - mv t2, zero -label168: - beq t2, zero, label210 - addiw a1, a1, 20 - blt t0, a3, label7 - sw a0, 84(sp) - j label7 -label733: + bge t0, t3, label395 mv t2, zero - j label146 -label801: + j label54 +label395: sw a0, 80(sp) lw t2, 76(sp) - j label168 -label572: - sw zero, 72(sp) - lw t1, 68(sp) - j label106 -label560: - sw zero, 68(sp) - lw t1, 64(sp) - j label102 -label551: - sw zero, 64(sp) - lw t1, 60(sp) - j label98 -label772: - mv t2, zero - j label158 -label763: - mv t2, zero - j label155 -label754: - mv t2, zero - j label152 -label518: +label54: + beq t2, zero, label940 + addiw a1, a1, 20 + blt t0, a3, label67 + sw a0, 84(sp) + j label67 +label374: + sw a0, 72(sp) + lw t2, 68(sp) + j label47 +label317: + sw a0, 48(sp) + lw t2, 44(sp) + j label28 +label740: sw zero, 52(sp) lw t1, 48(sp) - j label87 -label745: + j label147 +label356: + sw a0, 64(sp) + lw t2, 60(sp) + j label41 +label348: mv t2, zero - j label149 -label723: - sw a0, 48(sp) - lw t2, 44(sp) - j label142 -label59: - lw a5, 8(sp) - j label56 -label596: - sw zero, 80(sp) - lw t1, 76(sp) - j label114 + j label38 +label806: + sw zero, 76(sp) + lw t1, 72(sp) + j label170 +label365: + sw a0, 68(sp) + lw t2, 64(sp) + j label44 +label724: + mv a1, t3 + j label74 diff --git a/tests/SysY2022/hidden_functional/39_fp_params.riscv.s b/tests/SysY2022/hidden_functional/39_fp_params.riscv.s index 093fd6ca4..2130c8bfa 100644 --- a/tests/SysY2022/hidden_functional/39_fp_params.riscv.s +++ b/tests/SysY2022/hidden_functional/39_fp_params.riscv.s @@ -1151,7 +1151,7 @@ label5: .globl main main: # stack usage: CalleeArg[892] Local[772] RegSpill[404] CalleeSaved[152] - addi sp, sp, -1776 + addi sp, sp, -1768 sd ra, 392(sp) fsw f20, 400(sp) sd s8, 408(sp) @@ -1180,8 +1180,8 @@ main: jal getint li s2, 40 mv s3, zero - addi s0, sp, 1224 - sw a0, 896(sp) + addi s0, sp, 1216 + sw a0, 888(sp) mv s1, s0 li s0, 24 .p2align 2 @@ -1189,229 +1189,229 @@ label1246: mv a0, s1 jal getfarray addiw a1, s3, 1 - bge a1, s2, label1249 + bge a1, s2, label1266 addi s1, s1, 12 mv s3, a1 j label1246 -label1249: - addi s1, sp, 920 +label1266: + addi s1, sp, 912 mv s2, zero .p2align 2 -label1250: +label1251: mv a0, s1 jal getarray addiw a1, s2, 1 - bge a1, s0, label1254 + bge a1, s0, label1255 addi s1, s1, 12 mv s2, a1 - j label1250 -label1254: - lw s6, 896(sp) - addi s0, sp, 1224 - addi s2, sp, 896 - sh2add a1, s6, s0 + j label1251 +label1255: + lw s6, 888(sp) + addi s0, sp, 1216 + addi s2, sp, 888 addi a0, s0, 12 + addi a3, s0, 24 + sh2add a1, s6, s0 sh2add a2, s6, a0 + sh2add a0, s6, a3 flw f10, 0(a1) - addi a1, s0, 24 - fsw f10, 1760(sp) - sh2add a0, s6, a1 + addi a1, s0, 36 + fsw f10, 1752(sp) flw f25, 0(a2) flw f12, 0(a0) - addi a2, s0, 36 + sh2add a2, s6, a1 addi a0, s0, 48 - sh2add a1, s6, a2 - fsw f12, 1764(sp) - sh2add a2, s6, a0 - flw f14, 0(a1) addi a1, s0, 60 - fsw f14, 644(sp) + fsw f12, 1756(sp) + sh2add a3, s6, a0 + flw f14, 0(a2) sh2add a0, s6, a1 - flw f0, 0(a2) addi a2, s0, 72 - fsw f0, 1768(sp) + addi a1, s0, 84 + fsw f14, 1764(sp) + flw f0, 0(a3) sh2add a3, s6, a2 - flw f24, 0(a0) + fsw f0, 1760(sp) addi a2, s0, 96 + flw f24, 0(a0) + sh2add a0, s6, a1 flw f16, 0(a3) - addi a0, s0, 84 + sh2add a1, s6, a2 addi a3, s0, 108 + fsw f16, 600(sp) + sh2add a2, s6, a3 + flw f8, 0(a0) + addi a3, s0, 132 + flw f23, 0(a1) + addi a0, s0, 120 + flw f1, 0(a2) sh2add a1, s6, a0 - fsw f16, 608(sp) - sh2add a0, s6, a2 - flw f8, 0(a1) - sh2add a1, s6, a3 - flw f23, 0(a0) + sh2add a2, s6, a3 + fsw f1, 1748(sp) addi a3, s0, 156 - addi a0, s0, 120 - flw f1, 0(a1) - sh2add a2, s6, a0 - addi a1, s0, 132 - fsw f1, 1756(sp) + flw f15, 0(a1) + addi a1, s0, 144 + fsw f15, 1744(sp) sh2add a0, s6, a1 - flw f15, 0(a2) - addi a2, s0, 144 - fsw f15, 1752(sp) - sh2add a1, s6, a2 - flw f16, 0(a0) - sh2add a0, s6, a3 - fsw f16, 1748(sp) - flw f10, 0(a1) - addi a1, s0, 168 + flw f16, 0(a2) + sh2add a1, s6, a3 + addi a2, s0, 168 + addi a3, s0, 180 + fsw f16, 1740(sp) + flw f10, 0(a0) + sh2add a0, s6, a2 fsw f10, 584(sp) - sh2add a2, s6, a1 - flw f17, 0(a0) - addi a0, s0, 180 - fsw f17, 1744(sp) - sh2add a3, s6, a0 - flw f10, 0(a2) - addi a0, s0, 204 addi a2, s0, 192 - fsw f10, 1732(sp) - sh2add a1, s6, a2 - flw f12, 0(a3) - addi a2, s0, 216 - sh2add a3, s6, a0 - fsw f12, 1740(sp) - addi a0, s0, 228 - flw f10, 0(a1) - sh2add a1, s6, a2 - fsw f10, 592(sp) - sh2add a2, s6, a0 - flw f27, 0(a3) - flw f11, 0(a1) - addi a1, s0, 240 - fsw f11, 1728(sp) + flw f17, 0(a1) + sh2add a1, s6, a3 + fsw f17, 1736(sp) + addi a3, s0, 204 + flw f10, 0(a0) + sh2add a0, s6, a2 + fsw f10, 1724(sp) + sh2add a2, s6, a3 + flw f12, 0(a1) + addi a3, s0, 228 + addi a1, s0, 216 + fsw f12, 1732(sp) + flw f10, 0(a0) sh2add a0, s6, a1 - flw f10, 0(a2) - addi a2, s0, 252 - fsw f10, 1720(sp) - sh2add a1, s6, a2 + fsw f10, 588(sp) + sh2add a1, s6, a3 + flw f27, 0(a2) + flw f11, 0(a0) + addi a2, s0, 240 + sh2add a0, s6, a2 + fsw f11, 1720(sp) + flw f10, 0(a1) + addi a1, s0, 252 + fsw f10, 1712(sp) + sh2add a3, s6, a1 flw f12, 0(a0) + addi a1, s0, 276 addi a0, s0, 264 - fsw f12, 660(sp) + fsw f12, 648(sp) sh2add a2, s6, a0 - flw f11, 0(a1) - addi a1, s0, 276 - fsw f11, 588(sp) + flw f11, 0(a3) sh2add a0, s6, a1 + addi a3, s0, 300 + fsw f11, 656(sp) flw f21, 0(a2) - addi a1, s0, 300 flw f22, 0(a0) addi a2, s0, 288 - sh2add a0, s6, a1 - sh2add a3, s6, a2 - flw f10, 0(a3) - addi a2, s0, 312 + sh2add a0, s6, a3 sh2add a1, s6, a2 - fsw f10, 1716(sp) + addi a3, s0, 396 + flw f10, 0(a1) + addi a1, s0, 312 + fsw f10, 1708(sp) + sh2add a2, s6, a1 flw f20, 0(a0) - flw f12, 0(a1) + flw f12, 0(a2) addi a0, s0, 324 - addi a1, s0, 336 - sh2add a2, s6, a0 + addi a2, s0, 336 + sh2add a1, s6, a0 fsw f12, 580(sp) - sh2add a0, s6, a1 - flw f11, 0(a2) - addi a2, s0, 348 - fsw f11, 1724(sp) - sh2add a1, s6, a2 + sh2add a0, s6, a2 + flw f11, 0(a1) + addi a1, s0, 348 + fsw f11, 1716(sp) + sh2add a2, s6, a1 flw f15, 0(a0) addi a0, s0, 360 - fsw f15, 1736(sp) - sh2add a2, s6, a0 - flw f10, 0(a1) - addi a0, s0, 384 - addi a1, s0, 372 - fsw f10, 664(sp) - sh2add a3, s6, a1 - flw f26, 0(a2) + fsw f15, 1728(sp) + sh2add a1, s6, a0 + flw f10, 0(a2) + addi a2, s0, 372 + fsw f10, 652(sp) + sh2add a0, s6, a2 + flw f26, 0(a1) + flw f18, 0(a0) + addi a1, s0, 384 + sh2add a0, s6, a3 + sh2add a2, s6, a1 + addi a3, s0, 444 + flw f9, 0(a2) + addi a1, s0, 408 + flw f10, 0(a0) + sh2add a2, s6, a1 + addi a0, s0, 420 + fsw f10, 632(sp) sh2add a1, s6, a0 - flw f18, 0(a3) - addi a2, s0, 396 - addi a0, s0, 408 - sh2add a3, s6, a2 - flw f9, 0(a1) - sh2add a2, s6, a0 - flw f10, 0(a3) - addi a1, s0, 420 - sh2add a0, s6, a1 - fsw f10, 640(sp) flw f11, 0(a2) addi a2, s0, 432 - fsw f11, 652(sp) - sh2add a1, s6, a2 - flw f19, 0(a0) + fsw f11, 640(sp) + sh2add a0, s6, a2 + flw f19, 0(a1) addi a2, s0, 456 - addi a0, s0, 444 - flw f10, 0(a1) - sh2add a3, s6, a0 - addi a1, s0, 468 + sh2add a1, s6, a3 + flw f10, 0(a0) + addi a3, s0, 468 sh2add a0, s6, a2 - fsw f10, 656(sp) - sh2add a2, s6, a1 - flw f16, 0(a3) + fsw f10, 644(sp) + flw f16, 0(a1) + sh2add a1, s6, a3 fsw f16, 576(sp) flw f10, 0(a0) - fsw f10, 624(sp) - flw f11, 0(a2) - fsw f11, 648(sp) + fsw f10, 616(sp) + flw f11, 0(a1) + fsw f11, 636(sp) fsw f23, 0(sp) fsw f1, 8(sp) - flw f15, 1752(sp) + flw f15, 1744(sp) fsw f15, 16(sp) - flw f16, 1748(sp) + flw f16, 1740(sp) fsw f16, 24(sp) flw f10, 584(sp) fsw f10, 32(sp) fsw f17, 40(sp) - flw f10, 1732(sp) + flw f10, 1724(sp) fsw f10, 48(sp) - flw f12, 1740(sp) + flw f12, 1732(sp) fsw f12, 56(sp) - flw f10, 592(sp) + flw f10, 588(sp) fsw f10, 64(sp) fsw f27, 72(sp) - flw f11, 1728(sp) + flw f11, 1720(sp) fsw f11, 80(sp) - flw f10, 1720(sp) + flw f10, 1712(sp) fsw f10, 88(sp) - flw f12, 660(sp) + flw f12, 648(sp) fsw f12, 96(sp) - flw f11, 588(sp) + flw f11, 656(sp) fsw f11, 104(sp) fsw f21, 112(sp) fsw f22, 120(sp) - flw f10, 1716(sp) + flw f10, 1708(sp) fsw f10, 128(sp) fsw f20, 136(sp) flw f12, 580(sp) fsw f12, 144(sp) - flw f11, 1724(sp) + flw f11, 1716(sp) fsw f11, 152(sp) - flw f15, 1736(sp) + flw f15, 1728(sp) fsw f15, 160(sp) - flw f10, 664(sp) + flw f10, 652(sp) fsw f10, 168(sp) fsw f26, 176(sp) fsw f18, 184(sp) fsw f9, 192(sp) - flw f10, 640(sp) + flw f10, 632(sp) fsw f10, 200(sp) - flw f11, 652(sp) + flw f11, 640(sp) fsw f11, 208(sp) fsw f19, 216(sp) - flw f10, 656(sp) + flw f10, 644(sp) fsw f10, 224(sp) flw f16, 576(sp) fsw f16, 232(sp) - flw f10, 624(sp) + flw f10, 616(sp) fsw f10, 240(sp) - flw f11, 648(sp) + flw f11, 636(sp) fsw f11, 248(sp) - flw f10, 1760(sp) - flw f12, 1764(sp) - flw f16, 608(sp) + flw f10, 1752(sp) + flw f12, 1756(sp) + flw f16, 600(sp) fmv.s f11, f25 fmv.s f13, f14 fmv.s f14, f0 @@ -1419,183 +1419,183 @@ label1254: fmv.s f17, f8 mv a0, s2 jal params_f40 - addi s1, sp, 920 - fsw f10, 1712(sp) - addi a3, s1, 24 - addi a2, s1, 276 - addi t1, s1, 240 - sh2add a1, s6, a2 - lw a0, 0(a1) - sh2add a2, s6, a3 - addi a1, s1, 72 - sd a0, 848(sp) - lw s4, 0(a2) - sh2add a0, s6, a1 - lw a2, 0(a0) - addi a1, s1, 12 - sh2add a0, s6, a1 - addi a1, s1, 48 - sd a2, 856(sp) - lw s5, 0(a0) - addi a2, s1, 264 - sh2add a0, s6, a1 - addi a1, s1, 60 - lw s3, 0(a0) - sh2add a0, s6, a1 - sh2add a1, s6, a2 - lw a7, 0(a0) - addi a2, s1, 228 - sh2add a0, s6, s1 - sd a7, 672(sp) - lw a6, 0(a1) - lw t0, 0(a0) + addi s1, sp, 912 + fsw f10, 1704(sp) + addi a2, s1, 24 + addi a1, s1, 276 + sh2add a3, s6, a1 + lw a0, 0(a3) sh2add a1, s6, a2 - addi a0, s1, 84 - addi a2, s1, 120 - sd t0, 680(sp) - lw a4, 0(a1) + sd a0, 840(sp) + lw s4, 0(a1) + addi a0, s1, 72 + sh2add a1, s6, a0 + lw a2, 0(a1) + addi a0, s1, 12 sh2add a1, s6, a0 + addi a0, s1, 48 + sd a2, 848(sp) + lw s5, 0(a1) + addi a2, s1, 60 + sh2add a1, s6, a0 + lw s3, 0(a1) sh2add a0, s6, a2 - sd a4, 864(sp) - addi a2, s1, 156 - lw a4, 0(a1) - sh2add a1, s6, a2 - addi a2, s1, 204 - sd a4, 880(sp) - lw a5, 0(a0) - addi a0, s1, 96 - sd a5, 1208(sp) + addi a1, s1, 264 + addi a2, s1, 120 + lw a7, 0(a0) + sh2add a0, s6, a1 + sh2add a1, s6, s1 + sd a7, 664(sp) + lw a6, 0(a0) + addi a0, s1, 228 + lw t0, 0(a1) + sh2add a1, s6, a0 + addi a0, s1, 84 + sd t0, 672(sp) lw a4, 0(a1) - addi a5, s1, 192 sh2add a1, s6, a0 sh2add a0, s6, a2 - sd a4, 1216(sp) + sd a4, 856(sp) addi a2, s1, 144 - lw t6, 0(a1) + lw a4, 0(a1) sh2add a3, s6, a2 + addi a1, s1, 156 + sd a4, 872(sp) + lw a5, 0(a0) + sh2add a0, s6, a1 + addi a1, s1, 96 + sd a5, 1200(sp) + lw a4, 0(a0) + addi a5, s1, 36 + sh2add a0, s6, a1 + addi a1, s1, 204 + sd a4, 1208(sp) + lw t6, 0(a0) + sh2add a0, s6, a1 addi a1, s1, 216 - sd t6, 688(sp) + sd t6, 680(sp) lw t0, 0(a0) sh2add a0, s6, a1 addi a1, s1, 108 - sd t0, 696(sp) - addi t0, s1, 36 + sd t0, 688(sp) + addi t0, s1, 192 lw a4, 0(a0) sh2add a0, s6, a1 addi a1, s1, 132 - sd a4, 704(sp) - sh2add a4, s6, a1 + sd a4, 696(sp) lw t4, 0(a0) - sh2add a1, s6, a5 - sh2add a5, s6, t0 - sd t4, 600(sp) + sh2add a4, s6, a1 + sh2add a1, s6, t0 + sh2add t0, s6, a5 + sd t4, 592(sp) lw a0, 0(a3) lw a2, 0(a4) lw a3, 0(a1) - lw a4, 0(a5) + lw a4, 0(t0) addi a1, s1, 252 - sh2add t0, s6, a1 - sh2add a1, s6, t1 - sd a4, 712(sp) - lw a5, 0(t0) - sd a5, 616(sp) + addi t0, s1, 240 + sh2add t1, s6, a1 + sh2add a1, s6, t0 + sd a4, 704(sp) + lw a5, 0(t1) + sd a5, 608(sp) lw a4, 0(a1) addi a5, s1, 180 sh2add t0, s6, a5 - sd a4, 720(sp) - addi a4, s1, 168 + sd a4, 712(sp) lw a1, 0(t0) + addi a4, s1, 168 sh2add a5, s6, a4 lw t1, 0(a5) - sd t1, 632(sp) - ld a4, 864(sp) + sd t1, 624(sp) + ld a4, 856(sp) sd a4, 0(sp) flw f10, 584(sp) fsw f10, 8(sp) fsw f25, 16(sp) - flw f16, 1748(sp) + flw f16, 1740(sp) fsw f16, 24(sp) - flw f10, 624(sp) + flw f10, 616(sp) fsw f10, 32(sp) - flw f16, 608(sp) + flw f16, 600(sp) fsw f16, 40(sp) - ld a4, 880(sp) + ld a4, 872(sp) sd a4, 48(sp) fsw f9, 56(sp) - ld a5, 1208(sp) + ld a5, 1200(sp) sd a5, 64(sp) - ld a4, 1216(sp) + ld a4, 1208(sp) sd a4, 72(sp) - flw f12, 660(sp) + flw f12, 648(sp) fsw f12, 80(sp) - flw f10, 640(sp) + flw f10, 632(sp) fsw f10, 88(sp) fsw f22, 96(sp) - flw f1, 1756(sp) + flw f1, 1748(sp) fsw f1, 104(sp) fsw f20, 112(sp) sd t6, 120(sp) - flw f11, 648(sp) + flw f11, 636(sp) fsw f11, 128(sp) - ld t0, 696(sp) + ld t0, 688(sp) sd t0, 136(sp) - flw f11, 588(sp) + flw f11, 656(sp) fsw f11, 144(sp) - flw f10, 592(sp) + flw f10, 588(sp) fsw f10, 152(sp) fsw f24, 160(sp) - flw f11, 652(sp) + flw f11, 640(sp) fsw f11, 168(sp) - ld a4, 704(sp) + ld a4, 696(sp) sd a4, 176(sp) sd t4, 184(sp) - flw f10, 1732(sp) + flw f10, 1724(sp) fsw f10, 192(sp) - flw f15, 1752(sp) + flw f15, 1744(sp) fsw f15, 200(sp) - flw f10, 1760(sp) + flw f10, 1752(sp) fsw f10, 208(sp) sd a0, 216(sp) fsw f18, 224(sp) sd a2, 232(sp) sd a3, 240(sp) - flw f11, 1724(sp) + flw f11, 1716(sp) fsw f11, 248(sp) - flw f10, 1716(sp) + flw f10, 1708(sp) fsw f10, 256(sp) - flw f17, 1744(sp) + flw f17, 1736(sp) fsw f17, 264(sp) - flw f10, 664(sp) + flw f10, 652(sp) fsw f10, 272(sp) - ld a4, 712(sp) + ld a4, 704(sp) sd a4, 280(sp) - ld a5, 616(sp) + ld a5, 608(sp) sd a5, 288(sp) - ld a4, 720(sp) + ld a4, 712(sp) sd a4, 296(sp) - flw f11, 1728(sp) + flw f11, 1720(sp) fsw f11, 304(sp) - flw f10, 1720(sp) + flw f10, 1712(sp) fsw f10, 312(sp) fsw f21, 320(sp) flw f12, 580(sp) fsw f12, 328(sp) - flw f10, 656(sp) + flw f10, 644(sp) fsw f10, 336(sp) fsw f27, 344(sp) sd a1, 352(sp) - flw f12, 1764(sp) + flw f12, 1756(sp) fsw f12, 360(sp) sd t1, 368(sp) fsw f19, 376(sp) sd s2, 384(sp) - ld t0, 680(sp) - ld a0, 848(sp) - ld a2, 856(sp) - flw f15, 1736(sp) - flw f12, 1740(sp) - flw f0, 1768(sp) - flw f14, 644(sp) + ld t0, 672(sp) + ld a0, 840(sp) + ld a2, 848(sp) + flw f15, 1728(sp) + flw f12, 1732(sp) + flw f0, 1760(sp) + flw f14, 1764(sp) flw f16, 576(sp) mv a1, s4 fmv.s f10, f0 @@ -1607,226 +1607,226 @@ label1254: mv a7, t0 fmv.s f17, f26 jal params_f40_i24 - addi s6, s0, 120 - addi t6, s0, 108 - addi a7, s0, 84 + addi t6, s0, 444 + addi t1, s0, 108 + fmv.s f8, f10 + addi t5, s0, 132 + addi s6, s0, 372 addi a2, s0, 12 addi t0, s0, 24 - addi a3, s0, 36 - addi t3, s0, 288 - addi a4, s0, 48 - addi t4, s0, 132 + addi a0, s0, 156 + addi s10, s0, 384 + addi a1, s0, 180 addi a5, s0, 60 + addi t2, s0, 432 + addi s7, s0, 228 addi a6, s0, 72 - addi t2, s0, 420 - addi s8, s0, 216 - addi t1, s0, 228 - addi a1, s0, 168 - fmv.s f8, f10 + addi a7, s0, 84 + addi a3, s0, 36 + addi a4, s0, 48 addi s11, s0, 96 - addi s9, s0, 276 - addi a0, s0, 432 - addi s7, s0, 264 - addi s10, s0, 444 - addi t5, s0, 456 - sd a2, 728(sp) - sd t0, 912(sp) - addi a2, s0, 324 - sd a3, 904(sp) - addi t0, s0, 180 - sd a4, 872(sp) - addi a3, s0, 300 - sd a5, 736(sp) - addi a4, s0, 372 - addi a5, s0, 336 - sd a6, 840(sp) - sd a7, 832(sp) - addi a6, s0, 156 - sd s11, 824(sp) - addi a7, s0, 384 - addi s11, s0, 144 + addi t3, s0, 264 + addi t4, s0, 276 + addi s8, s0, 300 + addi s9, s0, 288 + sd a2, 720(sp) + sd t0, 904(sp) + addi a2, s0, 168 + sd a3, 896(sp) + addi t0, s0, 420 + sd a4, 864(sp) + addi a3, s0, 120 + sd a5, 728(sp) + addi a4, s0, 216 + sd a6, 832(sp) + addi a5, s0, 456 + sd a7, 824(sp) + addi a6, s0, 336 sd s11, 816(sp) + addi a7, s0, 324 + addi s11, s0, 144 + sd s11, 808(sp) addi s11, s0, 192 - sd s11, 800(sp) + sd s11, 792(sp) addi s11, s0, 204 - sd s11, 808(sp) + sd s11, 800(sp) addi s11, s0, 240 - sd s11, 792(sp) - addi s11, s0, 252 sd s11, 784(sp) - addi s11, s0, 312 + addi s11, s0, 252 sd s11, 776(sp) - addi s11, s0, 348 + addi s11, s0, 312 sd s11, 768(sp) + addi s11, s0, 348 + sd s11, 760(sp) addi s11, s0, 360 - sd s11, 744(sp) + sd s11, 736(sp) addi s11, s0, 396 - sd s11, 752(sp) + sd s11, 744(sp) addi s11, s0, 408 - sd s11, 760(sp) + sd s11, 752(sp) addi s11, s0, 468 - sd s11, 1704(sp) - ld s11, 824(sp) - sd s11, 0(sp) - sd t6, 8(sp) - sd s6, 16(sp) - sd t4, 24(sp) + sd s11, 1696(sp) ld s11, 816(sp) + sd s11, 0(sp) + sd t1, 8(sp) + sd a3, 16(sp) + sd t5, 24(sp) + ld s11, 808(sp) sd s11, 32(sp) - sd a6, 40(sp) - sd a1, 48(sp) - sd t0, 56(sp) - ld s11, 800(sp) + sd a0, 40(sp) + sd a2, 48(sp) + sd a1, 56(sp) + ld s11, 792(sp) sd s11, 64(sp) - ld s11, 808(sp) + ld s11, 800(sp) sd s11, 72(sp) - sd s8, 80(sp) - sd t1, 88(sp) - ld s11, 792(sp) - sd s11, 96(sp) + sd a4, 80(sp) + sd s7, 88(sp) ld s11, 784(sp) - sd s11, 104(sp) - sd s7, 112(sp) - sd s9, 120(sp) - sd t3, 128(sp) - sd a3, 136(sp) + sd s11, 96(sp) ld s11, 776(sp) - sd s11, 144(sp) - sd a2, 152(sp) - sd a5, 160(sp) + sd s11, 104(sp) + sd t3, 112(sp) + sd t4, 120(sp) + sd s9, 128(sp) + sd s8, 136(sp) ld s11, 768(sp) + sd s11, 144(sp) + sd a7, 152(sp) + sd a6, 160(sp) + ld s11, 760(sp) sd s11, 168(sp) - ld s11, 744(sp) + ld s11, 736(sp) sd s11, 176(sp) - sd a4, 184(sp) - sd a7, 192(sp) - ld s11, 752(sp) + sd s6, 184(sp) + sd s10, 192(sp) + ld s11, 744(sp) sd s11, 200(sp) - ld s11, 760(sp) + ld s11, 752(sp) sd s11, 208(sp) - sd t2, 216(sp) - sd a0, 224(sp) - sd s10, 232(sp) - sd t5, 240(sp) - ld s11, 1704(sp) + sd t0, 216(sp) + sd t2, 224(sp) + sd t6, 232(sp) + sd a5, 240(sp) + ld s11, 1696(sp) sd s11, 248(sp) sd s2, 256(sp) - ld a2, 728(sp) - ld a5, 736(sp) - ld a7, 832(sp) - ld a6, 840(sp) - ld a4, 872(sp) - ld a3, 904(sp) - ld t0, 912(sp) + ld a2, 720(sp) + ld a5, 728(sp) + ld a7, 824(sp) + ld a6, 832(sp) + ld a4, 864(sp) + ld a3, 896(sp) + ld t0, 904(sp) mv a0, s0 mv a1, a2 mv a2, t0 jal params_fa40 - addi a2, s1, 264 addi a1, s1, 276 - fmv.s f23, f10 - addi t4, s1, 72 - addi t1, s1, 144 - addi a4, s1, 192 - addi a0, s1, 12 - addi t3, s1, 120 - addi t0, s1, 36 - addi a3, s1, 228 + addi a2, s1, 264 addi t5, s1, 84 + addi t0, s1, 36 addi a5, s1, 180 + addi t4, s1, 72 addi t2, s1, 132 - sd t0, 888(sp) - ld a6, 840(sp) + addi t3, s1, 120 + addi a3, s1, 228 + addi a0, s1, 12 + fmv.s f23, f10 + addi a4, s1, 192 + addi t1, s1, 144 + sd t0, 880(sp) + ld a6, 832(sp) addi t0, s1, 156 sd a6, 0(sp) sd t4, 8(sp) sd t5, 16(sp) - ld t6, 688(sp) + ld t6, 680(sp) sd t6, 24(sp) - ld a7, 832(sp) + ld a7, 824(sp) sd a7, 32(sp) - ld s11, 824(sp) - sd s11, 40(sp) ld s11, 816(sp) + sd s11, 40(sp) + ld s11, 808(sp) sd s11, 48(sp) - ld t4, 600(sp) + ld t4, 592(sp) sd t4, 56(sp) - flw f10, 1732(sp) + flw f10, 1724(sp) fsw f10, 64(sp) - flw f12, 1740(sp) + flw f12, 1732(sp) fsw f12, 72(sp) sd t3, 80(sp) - ld s11, 800(sp) + ld s11, 792(sp) sd s11, 88(sp) sd t2, 96(sp) sd t1, 104(sp) - ld s11, 808(sp) + ld s11, 800(sp) sd s11, 112(sp) - flw f11, 1728(sp) + flw f11, 1720(sp) fsw f11, 120(sp) - flw f10, 1720(sp) + flw f10, 1712(sp) fsw f10, 128(sp) sd t0, 136(sp) - ld t1, 632(sp) + ld t1, 624(sp) sd t1, 144(sp) - ld s11, 792(sp) - sd s11, 152(sp) ld s11, 784(sp) + sd s11, 152(sp) + ld s11, 776(sp) sd s11, 160(sp) fsw f21, 168(sp) fsw f22, 176(sp) sd a5, 184(sp) sd a4, 192(sp) - ld t0, 696(sp) + ld t0, 688(sp) sd t0, 200(sp) - ld a4, 704(sp) + ld a4, 696(sp) sd a4, 208(sp) - flw f10, 1716(sp) + flw f10, 1708(sp) fsw f10, 216(sp) fsw f20, 224(sp) sd a3, 232(sp) - ld a4, 720(sp) + ld a4, 712(sp) sd a4, 240(sp) - ld s11, 776(sp) + ld s11, 768(sp) sd s11, 248(sp) - ld a5, 616(sp) + ld a5, 608(sp) sd a5, 256(sp) sd a2, 264(sp) sd a1, 272(sp) - flw f11, 1724(sp) + flw f11, 1716(sp) fsw f11, 280(sp) - flw f15, 1736(sp) + flw f15, 1728(sp) fsw f15, 288(sp) - ld s11, 768(sp) + ld s11, 760(sp) sd s11, 296(sp) - ld t0, 680(sp) + ld t0, 672(sp) sd t0, 304(sp) sd a0, 312(sp) - ld s11, 744(sp) + ld s11, 736(sp) sd s11, 320(sp) fsw f18, 328(sp) sd s4, 336(sp) fsw f9, 344(sp) - ld s11, 752(sp) + ld s11, 744(sp) sd s11, 352(sp) - ld s11, 760(sp) + ld s11, 752(sp) sd s11, 360(sp) fsw f19, 368(sp) - ld a4, 712(sp) + ld a4, 704(sp) sd a4, 376(sp) sd s2, 384(sp) - ld a7, 672(sp) - ld a2, 728(sp) - ld a5, 736(sp) - ld t0, 888(sp) - flw f17, 1744(sp) - flw f16, 1748(sp) - flw f15, 1752(sp) - flw f1, 1756(sp) - flw f10, 1760(sp) - flw f12, 1764(sp) - flw f0, 1768(sp) - flw f14, 644(sp) + ld a7, 664(sp) + ld a2, 720(sp) + ld a5, 728(sp) + ld t0, 880(sp) + flw f17, 1736(sp) + flw f16, 1740(sp) + flw f15, 1744(sp) + flw f1, 1748(sp) + flw f10, 1752(sp) + flw f12, 1756(sp) + flw f0, 1760(sp) + flw f14, 1764(sp) mv a0, s1 mv a1, s5 fmv.s f11, f12 @@ -1838,7 +1838,7 @@ label1254: mv a6, s3 fmv.s f14, f1 jal params_mix - flw f10, 1712(sp) + flw f10, 1704(sp) mv s0, a0 jal putfloat li a0, 10 @@ -1855,8 +1855,8 @@ label1254: jal putint li a0, 10 jal putch - mv a0, zero ld ra, 392(sp) + mv a0, zero flw f20, 400(sp) ld s8, 408(sp) flw f22, 416(sp) @@ -1881,5 +1881,5 @@ label1254: ld s7, 552(sp) ld s4, 560(sp) ld s5, 568(sp) - addi sp, sp, 1776 + addi sp, sp, 1768 ret diff --git a/tests/SysY2022/performance/fft0.riscv.s b/tests/SysY2022/performance/fft0.riscv.s index 04807d4cb..3e1843a4d 100644 --- a/tests/SysY2022/performance/fft0.riscv.s +++ b/tests/SysY2022/performance/fft0.riscv.s @@ -481,15 +481,15 @@ power: mv s0, a0 sd s1, 16(sp) mv s1, a1 - bne a1, zero, label758 + bne a1, zero, label760 li a0, 1 -label760: +label758: ld ra, 0(sp) ld s0, 8(sp) ld s1, 16(sp) addi sp, sp, 24 ret -label758: +label760: mv a2, s1 bge s1, zero, label782 addiw a2, s1, 1 @@ -503,10 +503,10 @@ label782: lui a3, 524288 addiw a2, a3, 1 and a1, s1, a2 - bne a1, a4, label760 + bne a1, a4, label758 mv a1, s0 jal multiply - j label760 + j label758 .p2align 2 fft: addi sp, sp, -72 diff --git a/tests/SysY2022/performance/fft1.riscv.s b/tests/SysY2022/performance/fft1.riscv.s index 04807d4cb..3e1843a4d 100644 --- a/tests/SysY2022/performance/fft1.riscv.s +++ b/tests/SysY2022/performance/fft1.riscv.s @@ -481,15 +481,15 @@ power: mv s0, a0 sd s1, 16(sp) mv s1, a1 - bne a1, zero, label758 + bne a1, zero, label760 li a0, 1 -label760: +label758: ld ra, 0(sp) ld s0, 8(sp) ld s1, 16(sp) addi sp, sp, 24 ret -label758: +label760: mv a2, s1 bge s1, zero, label782 addiw a2, s1, 1 @@ -503,10 +503,10 @@ label782: lui a3, 524288 addiw a2, a3, 1 and a1, s1, a2 - bne a1, a4, label760 + bne a1, a4, label758 mv a1, s0 jal multiply - j label760 + j label758 .p2align 2 fft: addi sp, sp, -72 diff --git a/tests/SysY2022/performance/fft2.riscv.s b/tests/SysY2022/performance/fft2.riscv.s index 04807d4cb..3e1843a4d 100644 --- a/tests/SysY2022/performance/fft2.riscv.s +++ b/tests/SysY2022/performance/fft2.riscv.s @@ -481,15 +481,15 @@ power: mv s0, a0 sd s1, 16(sp) mv s1, a1 - bne a1, zero, label758 + bne a1, zero, label760 li a0, 1 -label760: +label758: ld ra, 0(sp) ld s0, 8(sp) ld s1, 16(sp) addi sp, sp, 24 ret -label758: +label760: mv a2, s1 bge s1, zero, label782 addiw a2, s1, 1 @@ -503,10 +503,10 @@ label782: lui a3, 524288 addiw a2, a3, 1 and a1, s1, a2 - bne a1, a4, label760 + bne a1, a4, label758 mv a1, s0 jal multiply - j label760 + j label758 .p2align 2 fft: addi sp, sp, -72 diff --git a/tests/SysY2022/performance/matmul1.riscv.s b/tests/SysY2022/performance/matmul1.riscv.s index cbbefb151..301b801f7 100644 --- a/tests/SysY2022/performance/matmul1.riscv.s +++ b/tests/SysY2022/performance/matmul1.riscv.s @@ -21,340 +21,355 @@ cmmc_parallel_body_payload_3: .globl main main: addi sp, sp, -88 -pcrel2037: +pcrel2255: auipc a0, %pcrel_hi(a) -pcrel2038: - auipc a1, %pcrel_hi(cmmc_parallel_body_2) +pcrel2256: + auipc a1, %pcrel_hi(cmmc_parallel_body_3) sd ra, 0(sp) sd s5, 8(sp) - addi s5, a0, %pcrel_lo(pcrel2037) + addi s5, a0, %pcrel_lo(pcrel2255) sd s0, 16(sp) -pcrel2039: +pcrel2257: auipc a0, %pcrel_hi(c) sd s7, 24(sp) -pcrel2040: +pcrel2258: auipc s7, %pcrel_hi(cmmc_parallel_body_payload_2) sd s8, 32(sp) - addi s8, s7, %pcrel_lo(pcrel2040) + addi s8, s7, %pcrel_lo(pcrel2258) sd s1, 40(sp) - addi s1, a0, %pcrel_lo(pcrel2039) + addi s1, a0, %pcrel_lo(pcrel2257) sd s6, 48(sp) -pcrel2041: - auipc a0, %pcrel_hi(cmmc_parallel_body_3) - addi s6, a1, %pcrel_lo(pcrel2038) +pcrel2259: + auipc a0, %pcrel_hi(cmmc_parallel_body_2) sd s9, 56(sp) + addi s6, a0, %pcrel_lo(pcrel2259) mv s9, zero + li a0, 125 sd s2, 64(sp) - addi s2, a0, %pcrel_lo(pcrel2041) + slli s0, a0, 5 + addi s2, a1, %pcrel_lo(pcrel2256) sd s3, 72(sp) - li a0, 125 + slli s3, s0, 1 sd s4, 80(sp) - slli s0, a0, 5 sh1add s4, s0, s0 - slli s3, s0, 1 -label1349: +label1566: li a0, 1000 - bge s9, a0, label1356 + bge s9, a0, label1571 mv a0, s5 jal getarray li a1, 1000 - bne a0, a1, label1354 + beq a0, a1, label1570 +label1599: + ld ra, 0(sp) + ld s5, 8(sp) + ld s0, 16(sp) + ld s7, 24(sp) + ld s8, 32(sp) + ld s1, 40(sp) + ld s6, 48(sp) + ld s9, 56(sp) + ld s2, 64(sp) + ld s3, 72(sp) + ld s4, 80(sp) + addi sp, sp, 88 + ret +label1570: addiw s9, s9, 1 add s5, s5, s0 - j label1349 -label1356: + j label1566 +label1571: li a0, 23 jal _sysy_starttime li a1, 1000 mv a0, zero -pcrel2042: +pcrel2260: auipc a3, %pcrel_hi(cmmc_parallel_body_0) - addi a2, a3, %pcrel_lo(pcrel2042) + addi a2, a3, %pcrel_lo(pcrel2260) jal cmmcParallelFor li a1, 1000 mv a0, zero -pcrel2043: +pcrel2261: auipc a3, %pcrel_hi(cmmc_parallel_body_1) - addi a2, a3, %pcrel_lo(pcrel2043) + addi a2, a3, %pcrel_lo(pcrel2261) jal cmmcParallelFor mv s9, zero mv s5, s1 mv a0, s1 mv a1, zero - lui a3, 524288 - addiw a2, a3, -1 - j label1360 + lui a4, 524288 + addiw a2, a4, -1 + j label1575 .p2align 2 -label1383: +label1598: addi a0, a0, 256 .p2align 2 -label1360: - ld a4, 0(a0) +label1575: + ld a3, 0(a0) addiw a1, a1, 64 - srai t1, a4, 32 - sext.w a3, a4 - min t0, a2, a3 - ld a3, 8(a0) - min a5, t0, t1 - srai t2, a3, 32 - sext.w a2, a3 - min t0, a5, a2 - ld a2, 16(a0) - min a4, t0, t2 - sext.w t1, a2 - srai t2, a2, 32 - min a5, a4, t1 - ld a4, 24(a0) - min a3, a5, t2 - srai t1, a4, 32 - sext.w t0, a4 - min a5, a3, t0 - ld a3, 32(a0) - min a2, a5, t1 - srai t0, a3, 32 - sext.w t2, a3 - min a5, a2, t2 - ld a2, 40(a0) - min a4, a5, t0 - srai t1, a2, 32 + srai t1, a3, 32 + sext.w t0, a3 + min a5, a2, t0 + ld a2, 8(a0) + min a4, a5, t1 sext.w a3, a2 + srai t1, a2, 32 min t0, a4, a3 - ld a3, 48(a0) + ld a3, 16(a0) min a5, t0, t1 srai t2, a3, 32 sext.w a4, a3 min t0, a5, a4 - ld a4, 56(a0) + ld a4, 24(a0) min a2, t0, t2 sext.w t1, a4 srai t0, a4, 32 min a5, a2, t1 - ld a2, 64(a0) + ld a2, 32(a0) min a3, a5, t0 sext.w t1, a2 srai t0, a2, 32 min a5, a3, t1 - ld a3, 72(a0) + ld a3, 40(a0) min a4, a5, t0 srai t1, a3, 32 sext.w a2, a3 min t0, a4, a2 - ld a2, 80(a0) + ld a2, 48(a0) min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 min t0, a5, a3 - ld a3, 88(a0) + ld a3, 56(a0) min a4, t0, t1 sext.w a2, a3 srai t1, a3, 32 - min a5, a4, a2 - ld a2, 96(a0) - min t0, a5, t1 + min t0, a4, a2 + ld a2, 64(a0) + min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 - min a4, t0, a3 - ld a3, 104(a0) - min a5, a4, t1 - sext.w a2, a3 - srai t1, a3, 32 - min t0, a5, a2 - ld a2, 112(a0) + min t0, a5, a3 + ld a3, 72(a0) min a4, t0, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 120(a0) - min a5, t0, t1 sext.w a2, a3 srai t1, a3, 32 - min t0, a5, a2 - ld a2, 128(a0) - min a4, t0, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 136(a0) + min t0, a4, a2 + ld a2, 80(a0) min a5, t0, t1 - sext.w a2, a3 - srai t1, a3, 32 - min a4, a5, a2 - ld a2, 144(a0) - min t0, a4, t1 + srai t2, a2, 32 sext.w a3, a2 - srai t1, a2, 32 - min a5, t0, a3 - ld a3, 152(a0) - min a4, a5, t1 - sext.w a2, a3 + min t0, a5, a3 + ld a3, 88(a0) + min a4, t0, t2 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a4, t1 + ld a4, 96(a0) + min a2, a5, t0 + sext.w t1, a4 + srai t0, a4, 32 + min a5, a2, t1 + ld a2, 104(a0) + min a3, a5, t0 + sext.w t1, a2 + srai t0, a2, 32 + min a5, a3, t1 + ld a3, 112(a0) + min a4, a5, t0 srai t1, a3, 32 + sext.w a2, a3 min t0, a4, a2 - ld a2, 160(a0) + ld a2, 120(a0) min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 min t0, a5, a3 - ld a3, 168(a0) + ld a3, 128(a0) min a4, t0, t1 sext.w a2, a3 srai t1, a3, 32 min t0, a4, a2 - ld a2, 176(a0) + ld a2, 136(a0) + min a5, t0, t1 + srai t2, a2, 32 + sext.w a4, a2 + min t0, a5, a4 + ld a4, 144(a0) + min a3, t0, t2 + sext.w t1, a4 + srai t0, a4, 32 + min a5, a3, t1 + ld a3, 152(a0) + min a2, a5, t0 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a2, t1 + ld a2, 160(a0) + min a4, a5, t0 + srai t1, a2, 32 + sext.w a3, a2 + min t0, a4, a3 + ld a3, 168(a0) min a5, t0, t1 + sext.w a2, a3 + srai t1, a3, 32 + min a4, a5, a2 + ld a2, 176(a0) + min t0, a4, t1 sext.w a3, a2 srai t1, a2, 32 - min a4, a5, a3 + min a5, t0, a3 ld a3, 184(a0) - min t0, a4, t1 + min a4, a5, t1 sext.w a2, a3 srai t1, a3, 32 - min a5, t0, a2 + min t0, a4, a2 ld a2, 192(a0) - min a4, a5, t1 - sext.w t0, a2 + min a5, t0, t1 + sext.w a4, a2 srai t1, a2, 32 - min a5, a4, t0 + min t0, a5, a4 ld a4, 200(a0) - min a3, a5, t1 - sext.w t0, a4 + min a3, t0, t1 + sext.w t2, a4 + srai t0, a4, 32 + min a5, a3, t2 + ld a3, 208(a0) + min a2, a5, t0 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a2, t1 + ld a2, 216(a0) + min a4, a5, t0 + srai t2, a2, 32 + sext.w t1, a2 + min a5, a4, t1 + ld a4, 224(a0) + min a3, a5, t2 srai t1, a4, 32 + sext.w t0, a4 min a5, a3, t0 - ld a3, 208(a0) + ld a3, 232(a0) min a2, a5, t1 sext.w t0, a3 srai t1, a3, 32 - min a5, a2, t0 - ld a2, 216(a0) - min a4, a5, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 224(a0) - min a5, t0, t1 - sext.w a2, a3 - srai t1, a3, 32 - min a4, a5, a2 - ld a2, 232(a0) - min t0, a4, t1 + min a4, a2, t0 + ld a2, 240(a0) + min a5, a4, t1 sext.w a3, a2 srai t1, a2, 32 - min a5, t0, a3 - ld a3, 240(a0) - min a4, a5, t1 - srai t0, a3, 32 + min t0, a5, a3 + ld a3, 248(a0) + min a4, t0, t1 sext.w t2, a3 - min a2, a4, t2 - ld a4, 248(a0) - min a5, a2, t0 - sext.w t1, a4 - srai t0, a4, 32 - min a3, a5, t1 - li a4, 960 - min a2, a3, t0 - blt a1, a4, label1383 - ld a3, 256(a0) srai t0, a3, 32 - sext.w a1, a3 - min a5, a2, a1 - ld a1, 264(a0) - min a4, a5, t0 - sext.w a2, a1 - srai t0, a1, 32 - min a5, a4, a2 - ld a2, 272(a0) - min a3, a5, t0 - sext.w a1, a2 - srai t0, a2, 32 - min a5, a3, a1 - ld a1, 280(a0) - min a4, a5, t0 - sext.w a3, a1 - srai t0, a1, 32 - min a5, a4, a3 - ld a3, 288(a0) + min a5, a4, t2 + li a3, 960 min a2, a5, t0 - sext.w t1, a3 - srai a5, a3, 32 - min a4, a2, t1 - ld a2, 296(a0) - min a1, a4, a5 - sext.w t0, a2 - srai a5, a2, 32 - min a4, a1, t0 - ld a1, 304(a0) - min a3, a4, a5 + blt a1, a3, label1598 + ld a1, 256(a0) srai t0, a1, 32 - sext.w a2, a1 - min a5, a3, a2 - ld a2, 312(a0) - min a4, a5, t0 + sext.w a5, a1 + min a3, a2, a5 + ld a2, 264(a0) + min a4, a3, t0 sext.w a1, a2 srai t0, a2, 32 min a5, a4, a1 - ld a1, 320(a0) + ld a1, 272(a0) min a3, a5, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 328(a0) + ld a2, 280(a0) min a4, a5, t0 - srai t1, a2, 32 - sext.w a3, a2 - min a5, a4, a3 - ld a3, 336(a0) - min a1, a5, t1 - sext.w t0, a3 - srai t1, a3, 32 - min a4, a1, t0 - ld a1, 344(a0) - min a2, a4, t1 - srai t0, a1, 32 - sext.w a5, a1 - min a3, a2, a5 - ld a2, 352(a0) - min a4, a3, t0 sext.w a1, a2 srai t0, a2, 32 min a5, a4, a1 - ld a1, 360(a0) + ld a1, 288(a0) min a3, a5, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 368(a0) + ld a2, 296(a0) min a4, a5, t0 sext.w a1, a2 srai t0, a2, 32 - min a5, a4, a1 - ld a1, 376(a0) - min a3, a5, t0 + min a3, a4, a1 + ld a1, 304(a0) + min a5, a3, t0 sext.w a2, a1 srai t0, a1, 32 - min a5, a3, a2 - ld a2, 384(a0) - min a4, a5, t0 + min a4, a5, a2 + ld a2, 312(a0) + min a3, a4, t0 sext.w a1, a2 srai t0, a2, 32 - min a5, a4, a1 - ld a1, 392(a0) - min a3, a5, t0 + min a5, a3, a1 + ld a1, 320(a0) + min a4, a5, t0 + srai t1, a1, 32 + sext.w a3, a1 + min a5, a4, a3 + ld a3, 328(a0) + min a2, a5, t1 + sext.w t0, a3 + srai t1, a3, 32 + min a4, a2, t0 + ld a2, 336(a0) + min a1, a4, t1 + srai t0, a2, 32 + sext.w a5, a2 + min a4, a1, a5 + ld a1, 344(a0) + min a3, a4, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 400(a0) + ld a2, 352(a0) min a4, a5, t0 - srai t1, a2, 32 sext.w a1, a2 + srai t0, a2, 32 min a5, a4, a1 + ld a1, 360(a0) + min a3, a5, t0 + sext.w t1, a1 + srai t0, a1, 32 + min a4, a3, t1 + ld a3, 368(a0) + min a2, a4, t0 + sext.w a5, a3 + srai t0, a3, 32 + min a4, a2, a5 + ld a2, 376(a0) + min a1, a4, t0 + sext.w a5, a2 + srai t0, a2, 32 + min a4, a1, a5 + ld a1, 384(a0) + min a3, a4, t0 + sext.w a5, a1 + srai t0, a1, 32 + min a4, a3, a5 + ld a3, 392(a0) + min a2, a4, t0 + sext.w a5, a3 + srai t0, a3, 32 + min a4, a2, a5 + ld a2, 400(a0) + min a1, a4, t0 + sext.w a5, a2 + srai t0, a2, 32 + min a4, a1, a5 ld a1, 408(a0) - min a3, a5, t1 -pcrel2044: + min a3, a4, t0 +pcrel2262: auipc s7, %pcrel_hi(cmmc_parallel_body_payload_2) - sd s5, %pcrel_lo(pcrel2044)(s7) + sd s5, %pcrel_lo(pcrel2262)(s7) srai a0, a1, 32 - sext.w t0, a1 + sext.w a5, a1 li a1, 1000 - min a4, a3, t0 + min a4, a3, a5 min a2, a4, a0 mv a0, zero sw a2, 8(s8) @@ -362,1174 +377,1276 @@ pcrel2044: jal cmmcParallelFor li a0, 1000 addiw s9, s9, 1 - bge s9, a0, label1365 + bge s9, a0, label1580 add s5, s5, s0 mv a1, zero - lui a3, 524288 + lui a4, 524288 mv a0, s5 - addiw a2, a3, -1 - j label1360 -label1381: - li a0, 92 - jal _sysy_stoptime - mv a0, s6 - jal putint - mv a0, zero -label1354: - ld ra, 0(sp) - ld s5, 8(sp) - ld s0, 16(sp) - ld s7, 24(sp) - ld s8, 32(sp) - ld s1, 40(sp) - ld s6, 48(sp) - ld s9, 56(sp) - ld s2, 64(sp) - ld s3, 72(sp) - ld s4, 80(sp) - addi sp, sp, 88 - ret -label1365: + addiw a2, a4, -1 + j label1575 +label1580: mv a2, s1 mv a0, zero mv a1, s1 mv a4, zero - j label1369 + j label1584 .p2align 2 -label1372: +label1596: addi a1, a1, 64 .p2align 2 -label1369: +label1584: mul t0, a4, s0 - li t5, 375 - li t6, 875 addiw a4, a4, 16 add a3, s1, t0 sh2add a5, a0, a3 - add t3, a3, s4 + add t2, a3, s0 lw t1, 0(a5) - add a5, a3, s0 + sh2add a5, a0, t2 subw t0, zero, t1 - sh2add t1, a0, a5 + add t2, a3, s3 sw t0, 0(a1) - lw t0, 0(t1) - add t1, a3, s3 - subw t2, zero, t0 - sh2add a5, a0, t1 - sh2add t1, a0, t3 - sw t2, 4(a1) - lw t2, 0(a5) - subw t0, zero, t2 - sw t0, 8(a1) - lw a5, 0(t1) - li t1, 125 - subw t0, zero, a5 - slli t2, t1, 7 + lw t1, 0(a5) + sh2add a5, a0, t2 + subw t0, zero, t1 + sw t0, 4(a1) + add t0, a3, s4 + lw t3, 0(a5) + sh2add t2, a0, t0 + subw t1, zero, t3 + li t0, 125 + sw t1, 8(a1) + lw a5, 0(t2) + slli t2, t0, 7 + subw t1, zero, a5 add a5, a3, t2 - sw t0, 12(a1) - sh2add t0, a0, a5 - lw t1, 0(t0) - li t0, 625 - subw t3, zero, t1 - slli t1, t0, 5 + sh2add t3, a0, a5 + sw t1, 12(a1) + lw t1, 0(t3) + li t3, 625 + subw t0, zero, t1 + slli t1, t3, 5 add a5, a3, t1 - sw t3, 16(a1) + sw t0, 16(a1) sh2add t4, a0, a5 + li a5, 375 lw t0, 0(t4) subw t3, zero, t0 - slli t0, t5, 6 + slli t0, a5, 6 add t4, a3, t0 sw t3, 20(a1) - sh2add a5, a0, t4 - lw t3, 0(a5) - slli a5, t6, 5 - subw t5, zero, t3 - add t4, a3, a5 - sw t5, 24(a1) sh2add t5, a0, t4 - slli t4, t2, 1 - lw t3, 0(t5) - subw t6, zero, t3 - add t3, a3, t4 - sh2add t5, a0, t3 - sw t6, 28(a1) - lw t2, 0(t5) + lw a5, 0(t5) + li t5, 875 + subw t3, zero, a5 + slli a5, t5, 5 + add t4, a3, a5 + sw t3, 24(a1) + sh2add t3, a0, t4 + lw t6, 0(t3) + slli t3, t2, 1 + subw t5, zero, t6 + add t6, a3, t3 + sh2add t4, a0, t6 + sw t5, 28(a1) li t5, 1125 - subw t4, zero, t2 + lw t2, 0(t4) + subw t3, zero, t2 slli t2, t5, 5 - sw t4, 32(a1) add t4, a3, t2 + sw t3, 32(a1) + slli t2, t1, 1 sh2add t3, a0, t4 lw t5, 0(t3) - slli t3, t1, 1 - subw t2, zero, t5 - add t5, a3, t3 - li t3, 1375 - sw t2, 36(a1) - sh2add t2, a0, t5 - lw t4, 0(t2) - slli t2, t3, 5 - subw t1, zero, t4 + subw t6, zero, t5 + add t5, a3, t2 + sh2add t4, a0, t5 + sw t6, 36(a1) + lw t3, 0(t4) + li t4, 1375 + subw t1, zero, t3 + slli t3, t4, 5 + add t2, a3, t3 sw t1, 40(a1) - add t1, a3, t2 - sh2add t5, a0, t1 - lw t3, 0(t5) - slli t5, t0, 1 - subw t4, zero, t3 - add t2, a3, t5 sh2add t1, a0, t2 - sw t4, 44(a1) - li t4, 1625 - lw t3, 0(t1) - slli t2, t4, 5 + slli t2, t0, 1 + lw t4, 0(t1) + add t1, a3, t2 + subw t3, zero, t4 + li t2, 1625 + sh2add t4, a0, t1 + slli t1, t2, 5 + sw t3, 44(a1) + lw t3, 0(t4) subw t0, zero, t3 - sw t0, 48(a1) - add t0, a3, t2 - sh2add t1, a0, t0 - lw t3, 0(t1) - slli t1, a5, 1 - subw t2, zero, t3 add t3, a3, t1 + sh2add t4, a0, t3 + sw t0, 48(a1) + slli t3, a5, 1 + lw t0, 0(t4) + add t1, a3, t3 + li t4, 1875 + subw t2, zero, t0 + slli t3, t4, 5 + sh2add t0, a0, t1 sw t2, 52(a1) - sh2add t2, a0, t3 - li t3, 1875 - lw t0, 0(t2) + lw a5, 0(t0) + add t0, a3, t3 + subw t2, zero, a5 + sh2add t1, a0, t0 + sw t2, 56(a1) + lw a5, 0(t1) + subw t2, zero, a5 + li a5, 992 + sw t2, 60(a1) + blt a4, a5, label1596 + li t0, 125 + lui t2, 17 + slli a5, t0, 9 + add t1, a3, a5 + sh2add a4, a0, t1 + lw t0, 0(a4) + addiw a4, t2, -1632 subw a5, zero, t0 - slli t0, t3, 5 - add t2, a3, t0 - sw a5, 56(a1) - li t0, 992 - sh2add t1, a0, t2 - lw t3, 0(t1) - subw a5, zero, t3 - sw a5, 60(a1) - blt a4, t0, label1372 - li t2, 125 - slli a5, t2, 9 + li t2, 1125 + add t0, a3, a4 + sh2add t1, a0, t0 + sw a5, 64(a1) + lw a5, 0(t1) + subw a4, zero, a5 + slli a5, t2, 6 + lui t2, 19 add t1, a3, a5 + sw a4, 68(a1) sh2add t0, a0, t1 lw a4, 0(t0) - lui t0, 17 subw a5, zero, a4 - addiw a4, t0, -1632 - add t2, a3, a4 - sw a5, 64(a1) - sh2add t1, a0, t2 - li t2, 1125 - lw t0, 0(t1) - slli a4, t2, 6 - subw a5, zero, t0 - lui t2, 21 + addiw a4, t2, -1824 + li t2, 625 add t1, a3, a4 + sw a5, 72(a1) sh2add t0, a0, t1 - sw a5, 68(a1) - lui t1, 19 lw a5, 0(t0) - addiw t0, t1, -1824 subw a4, zero, a5 - add a5, a3, t0 - sw a4, 72(a1) - sh2add a4, a0, a5 - lw t1, 0(a4) - li a4, 625 - subw t0, zero, t1 - slli a5, a4, 7 - add t1, a3, a5 - sw t0, 76(a1) - sh2add t0, a0, t1 - lw a4, 0(t0) + slli a5, t2, 7 + add t0, a3, a5 + sw a4, 76(a1) + sh2add t1, a0, t0 + lui t0, 21 + lw a4, 0(t1) subw a5, zero, a4 - addiw a4, t2, -2016 - li t2, 1375 + addiw a4, t0, -2016 add t1, a3, a4 sw a5, 80(a1) + sh2add a5, a0, t1 + lw t0, 0(a5) + li a5, 1375 + subw a4, zero, t0 + slli t2, a5, 6 + add t1, a3, t2 + sw a4, 84(a1) sh2add t0, a0, t1 - slli t1, t2, 6 lw a5, 0(t0) - add t0, a3, t1 + lui t0, 22 subw a4, zero, a5 - sh2add a5, a0, t0 - sw a4, 84(a1) - lw a4, 0(a5) - lui a5, 22 - subw t1, zero, a4 - addiw t0, a5, 1888 - sw t1, 88(a1) - add t1, a3, t0 - sh2add a4, a0, t1 + addiw t1, t0, 1888 + add a5, a3, t1 + sw a4, 88(a1) + sh2add a4, a0, a5 addiw a0, a0, 1 - lw a5, 0(a4) + lw t0, 0(a4) li a4, 1000 - subw a3, zero, a5 + subw a3, zero, t0 sw a3, 92(a1) - bge a0, a4, label1879 + bge a0, a4, label1588 add a2, a2, s0 mv a4, zero mv a1, a2 - j label1369 -label1879: - auipc a1, %pcrel_hi(cmmc_parallel_body_payload_3) + j label1584 +label1588: + auipc a0, %pcrel_hi(cmmc_parallel_body_payload_3) mv s4, zero mv s6, zero - addi s3, a1, %pcrel_lo(label1879) + addi s3, a0, %pcrel_lo(label1588) .p2align 2 -label1376: +label1589: auipc s5, %pcrel_hi(cmmc_parallel_body_payload_3) mv a0, zero li a1, 1000 - sw s6, %pcrel_lo(label1376)(s5) + sw s6, %pcrel_lo(label1589)(s5) sw s6, 4(s3) sd s1, 8(s3) mv a2, s2 jal cmmcParallelFor li a0, 1000 addiw s4, s4, 1 - lw s6, %pcrel_lo(label1376)(s5) - bge s4, a0, label1381 + lw s6, %pcrel_lo(label1589)(s5) + bge s4, a0, label1593 add s1, s1, s0 - j label1376 + j label1589 +label1593: + li a0, 92 + jal _sysy_stoptime + mv a0, s6 + jal putint + mv a0, zero + j label1599 .p2align 2 cmmc_parallel_body_0: - addi sp, sp, -24 - mv t5, a1 -pcrel772: - auipc a4, %pcrel_hi(b) - li a5, 125 - sd s1, 0(sp) - addi a2, a4, %pcrel_lo(pcrel772) - slli a3, a5, 5 - sd s0, 8(sp) - sh2add t2, a3, a3 - sh1add t0, a3, a3 - slli a5, a3, 1 - mul a1, a0, a3 - sd s2, 16(sp) - slli t3, t0, 1 - slli t1, a5, 1 - add t4, a2, a1 -pcrel773: + addi sp, sp, -32 + mv t4, a1 +pcrel1081: + auipc a2, %pcrel_hi(b) +pcrel1082: auipc a1, %pcrel_hi(a) - addi a4, a1, %pcrel_lo(pcrel773) - mv a1, t4 - mv t6, zero - j label5 + sd s0, 0(sp) + addi t3, a2, %pcrel_lo(pcrel1081) + addi t5, a1, %pcrel_lo(pcrel1082) + li a2, 125 + sd s1, 8(sp) + slli a3, a2, 5 + sd s2, 16(sp) + sh2add t1, a3, a3 + sh1add a5, a3, a3 + slli a4, a3, 1 + sd s3, 24(sp) + slli t2, a5, 1 + slli t0, a4, 1 + j label2 .p2align 2 -label9: +label8: li a6, 125 - lui s0, 63 + lui s2, 63 slli t6, a6, 11 + add a7, a1, t6 + sh2add s0, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 1952 - lui s0, 64 - add a7, a2, a6 - sw t6, 256(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, 1856 - lui s0, 66 - add a7, a2, t6 - sw a6, 260(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 65 - sw t6, 264(a1) - addiw t6, a6, 1760 + lw a6, 0(s0) + addiw t6, s2, 1952 + sh2add s1, a0, a7 + lui s2, 64 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 1664 - lui s0, 67 - add a7, a2, a6 - sw t6, 268(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, 1568 + lw a6, 0(s0) + addiw t6, s2, 1856 + sh2add s1, a0, a7 + lui s2, 65 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 add a7, a2, t6 - sw a6, 272(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 68 - addiw a7, a6, 1472 - sw t6, 276(a1) - add t6, a2, a7 - sh2add a6, a0, t6 - lui t6, 69 - lw a7, 0(a6) - addiw a6, t6, 1376 - sw a7, 280(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - li a7, 1125 - lw a6, 0(t6) - slli t6, a7, 8 - add s0, a2, t6 - sw a6, 284(a1) + lw a6, 0(s0) + addiw t6, s2, 1760 + sh2add s1, a0, a7 + lui s2, 66 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 + add s1, a2, t6 + lw a6, 0(s0) + addiw t6, s2, 1664 + sh2add a7, a0, s1 + lui s2, 67 + add s0, a1, t6 + add s1, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 1568 sh2add a7, a0, s0 + lui s2, 68 + lw a6, 0(a7) + sh2add a7, a0, s1 + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + addiw t6, s2, 1472 + sh2add s0, a0, a7 + li s2, 1125 + sh2add a7, a0, s1 + lw a6, 0(s0) + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s0, a0, a7 + sh2add a7, a0, s1 + lw a6, 0(s0) + lui s0, 69 + addiw t6, s0, 1376 + sw a6, 0(a7) + add s0, a2, t6 + add s1, a1, t6 + slli t6, s2, 8 + sh2add a7, a0, s1 + lui s2, 73 lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 + add a7, a2, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 lui a7, 71 addiw t6, a7, 1184 - sw a6, 288(a1) + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - lui t6, 72 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 1088 - sw a7, 292(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 73 - lw a6, 0(t6) - addiw t6, a7, 992 - add s0, a2, t6 - sw a6, 296(a1) - lui t6, 74 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 896 - sw a7, 300(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 75 - lw a6, 0(t6) - addiw t6, a7, 800 - add s0, a2, t6 - sw a6, 304(a1) + sh2add a7, a0, s1 + lui s1, 72 + lw a6, 0(a7) + addiw t6, s1, 1088 + sh2add a7, a0, s0 + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + addiw t6, s2, 992 + sh2add s0, a0, a7 + lui s2, 75 + sh2add a7, a0, s1 + lw a6, 0(s0) + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) sh2add a7, a0, s0 + lui s0, 74 lw a6, 0(a7) + addiw t6, s0, 896 + sh2add a7, a0, s1 + add s0, a2, t6 + add s1, a1, t6 + sw a6, 0(a7) + addiw t6, s2, 800 + sh2add a7, a0, s1 + lui s2, 77 + lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 + add a7, a2, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 lui a7, 76 addiw t6, a7, 704 - sw a6, 308(a1) + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - lui t6, 77 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 608 - sw a7, 312(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - li a7, 625 - lw a6, 0(t6) - slli t6, a7, 9 - add s0, a2, t6 - sw a6, 316(a1) - lui t6, 79 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 416 - sw a7, 320(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 80 - lw a6, 0(t6) - addiw t6, a7, 320 + sh2add a7, a0, s1 + addiw t6, s2, 608 + lw a6, 0(a7) + li s2, 625 + add s1, a1, t6 + sh2add a7, a0, s0 add s0, a2, t6 - sw a6, 324(a1) + sw a6, 0(a7) + slli t6, s2, 9 + sh2add a7, a0, s1 + lui s2, 79 + add s1, a1, t6 + lw a6, 0(a7) sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 416 + sh2add a7, a0, s1 + lui s2, 80 + add s1, a1, t6 lw a6, 0(a7) - lui a7, 81 - addiw t6, a7, 224 - sw a6, 328(a1) + sh2add a7, a0, s0 add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 320 + sh2add a7, a0, s1 + lui s2, 81 + add s1, a1, t6 + lw a6, 0(a7) sh2add a7, a0, s0 - lui s0, 82 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 224 + sh2add a7, a0, s1 + lui s2, 83 lw a6, 0(a7) - addiw t6, s0, 128 - lui s0, 83 - add a7, a2, t6 - sw a6, 332(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 32 - lui s0, 84 - add a7, a2, a6 - sw t6, 336(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -64 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 340(a1) - sh2add a6, a0, a7 - lui a7, 85 - lw t6, 0(a6) - addiw a6, a7, -160 - sw t6, 344(a1) - add t6, a2, a6 - li a6, 1375 - sh2add s0, a0, t6 - slli t6, a6, 8 - lw a7, 0(s0) + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 82 + addiw t6, a7, 128 + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - sw a7, 348(a1) - sh2add a7, a0, s0 - lui s0, 87 + sh2add a7, a0, s1 + addiw t6, s2, 32 lw a6, 0(a7) - addiw t6, s0, -352 - lui s0, 88 - add a7, a2, t6 - sw a6, 352(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -448 - lui s0, 89 - add a7, a2, a6 - sw t6, 356(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -544 - lui s0, 91 - add a7, a2, t6 - sw a6, 360(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 90 - sw t6, 364(a1) - addiw t6, a6, -640 - add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -736 - lui s0, 92 - add a7, a2, a6 - sw t6, 368(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -832 - li s0, 375 - add a7, a2, t6 - sw a6, 372(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 93 - sw t6, 376(a1) - addiw t6, a6, -928 + lui s2, 85 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - slli a6, s0, 10 - lui s0, 95 - add a7, a2, a6 - sw t6, 380(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -1120 - lui s0, 96 + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 84 + addiw t6, a7, -64 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -160 + lw a6, 0(a7) + lui s2, 87 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 384(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -1216 - lui s0, 97 - add a7, a2, a6 - sw t6, 388(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -1312 + lw a6, 0(s1) + sh2add s0, a0, a7 + li a7, 1375 + slli t6, a7, 8 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -352 + lw a6, 0(a7) + lui s2, 89 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 392(a1) - sh2add a6, a0, a7 - lui a7, 98 - lw t6, 0(a6) - addiw a6, a7, -1408 - add s0, a2, a6 - sw t6, 396(a1) - lui a6, 99 - sh2add t6, a0, s0 - lw a7, 0(t6) - addiw t6, a6, -1504 - sw a7, 400(a1) + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 88 + addiw t6, a7, -448 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -544 + lw a6, 0(a7) + lui s2, 90 + add s1, a1, t6 + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -640 + sh2add a7, a0, s1 + lui s2, 91 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -736 + sh2add a7, a0, s1 + lui s2, 93 + lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 100 - sw t6, 404(a1) - addiw t6, a6, -1600 + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 92 + addiw t6, a7, -832 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -928 + lw a6, 0(a7) + lui s2, 95 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lui a7, 101 - lw t6, 0(a6) - addiw a6, a7, -1696 - sw t6, 408(a1) - add t6, a2, a6 - sh2add a7, a0, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 + li a7, 375 + slli t6, a7, 10 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -1120 + lw a6, 0(a7) + lui s2, 96 + add s1, a1, t6 + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -1216 + sh2add a7, a0, s1 + lui s2, 99 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + sh2add a7, a0, s1 + lui s1, 97 + lw a6, 0(a7) + addiw t6, s1, -1312 + sh2add a7, a0, s0 + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) + sh2add a7, a0, s0 + lui s0, 98 + lw a6, 0(a7) + addiw t6, s0, -1408 + sh2add a7, a0, s1 + add s0, a2, t6 + add s1, a1, t6 + sw a6, 0(a7) + addiw t6, s2, -1504 + sh2add a7, a0, s1 + lui s2, 100 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -1600 + sh2add a7, a0, s1 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + sh2add a7, a0, s1 + lui s1, 101 + lw a6, 0(a7) + addiw t6, s1, -1696 + sh2add a7, a0, s0 + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) + sh2add a1, a0, s1 + sh2add a7, a0, s0 addiw a0, a0, 1 lw a6, 0(a7) - sw a6, 412(a1) - ble t5, a0, label11 - add t4, t4, a3 + sw a6, 0(a1) + ble t4, a0, label9 +.p2align 2 +label2: + mv a1, t5 mv t6, zero - mv a1, t4 .p2align 2 -label5: - mul a6, t6, a3 - li s2, 875 +label4: + mul a7, t6, a3 addiw t6, t6, 64 - add a2, a4, a6 - sh2add a7, a0, a2 - add a6, a2, a3 - sh2add s0, a0, a6 - add a6, a2, a5 - lw s1, 0(a7) - sw s1, 0(a1) - lw a7, 0(s0) - sw a7, 4(a1) - sh2add a7, a0, a6 - add a6, a2, t0 - lw s0, 0(a7) - sh2add a7, a0, a6 - sw s0, 8(a1) + add a2, t3, a7 + sh2add a6, a0, a2 + add s1, a2, a3 + sh2add a7, a0, a1 lw s0, 0(a7) - add a7, a2, t1 - sh2add a6, a0, a7 - sw s0, 12(a1) - add a7, a2, t2 - lw s0, 0(a6) - sh2add s1, a0, a7 - sw s0, 16(a1) - add s0, a2, t3 - lw a6, 0(s1) - sh2add a7, a0, s0 - li s1, 875 - sw a6, 20(a1) - lw a6, 0(a7) - slli a7, s1, 5 - li s1, 1125 - sw a6, 24(a1) - add a6, a2, a7 + sh2add a7, a0, s1 + add s1, a2, a4 + sw s0, 0(a6) + add a6, a1, a3 sh2add s0, a0, a6 - li a6, 125 + sh2add a6, a0, s1 + lw s2, 0(s0) + sw s2, 0(a7) + add s2, a1, a4 + sh2add s0, a0, s2 + add s2, a2, a5 lw a7, 0(s0) - sw a7, 28(a1) - slli a7, a6, 8 + add s0, a1, a5 + sh2add s1, a0, s0 + sw a7, 0(a6) + add s0, a2, t0 + lw a6, 0(s1) + sh2add a7, a0, s2 + add s2, a2, t1 + sw a6, 0(a7) + add a7, a1, t0 + sh2add a6, a0, s0 + sh2add s1, a0, a7 + sh2add a7, a0, s2 + lw s0, 0(s1) + li s2, 875 + sw s0, 0(a6) + add s0, a2, t2 + add a6, a1, t1 + sh2add s3, a0, a6 + sh2add a6, a0, s0 + lw s1, 0(s3) + sw s1, 0(a7) + add s1, a1, t2 + sh2add a7, a0, s1 + lw s0, 0(a7) + slli a7, s2, 5 + add s1, a2, a7 + sw s0, 0(a6) + add s0, a1, a7 + sh2add a6, a0, s1 + sh2add s2, a0, s0 + li s1, 125 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s1, 8 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 32(a1) + li s1, 1125 + lw a7, 0(s2) + sw a7, 0(a6) slli a7, s1, 5 - add a6, a2, a7 - sh2add s0, a0, a6 - li a6, 625 - lw a7, 0(s0) - sw a7, 36(a1) - slli a7, a6, 6 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - li a6, 1375 - slli s1, a6, 5 - sw a7, 40(a1) - add a7, a2, s1 - li s1, 375 - sh2add a6, a0, a7 - lw s0, 0(a6) - slli a6, s1, 7 - li s1, 1625 - add a7, a2, a6 - sw s0, 44(a1) - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 48(a1) - slli a6, s1, 5 - add a7, a2, a6 - sh2add s0, a0, a7 - slli a7, s2, 6 - lw a6, 0(s0) - li s2, 125 + lw a7, 0(s2) + li s0, 625 + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1375 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 5 + add s0, a1, a7 add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + li s0, 375 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - sw a6, 52(a1) - li s1, 1875 - lw a6, 0(s0) + sh2add a6, a0, s2 + li s1, 1625 + lw a7, 0(s0) + sw a7, 0(a6) slli a7, s1, 5 - sw a6, 56(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - slli a6, s2, 9 + add s2, a1, a7 + add s0, a2, a7 + sh2add s1, a0, s2 + sh2add a6, a0, s0 + lw a7, 0(s1) + li s0, 875 + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1875 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 5 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - lui s2, 17 - add s1, a2, a6 + li s2, 125 + sw a7, 0(a6) + slli a7, s2, 9 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 17 + sw a7, 0(a6) + addiw a7, s1, -1632 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1125 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lui s0, 19 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s0, -1824 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - sw a7, 60(a1) - addiw s1, s2, -1632 + sh2add a6, a0, s2 + li s1, 625 lw a7, 0(s0) - lui s2, 26 - add a6, a2, s1 - sw a7, 64(a1) - sh2add a7, a0, a6 - lw s0, 0(a7) - li a7, 1125 - slli a6, a7, 6 - sw s0, 68(a1) - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) - lui a7, 19 - sw a6, 72(a1) - addiw a6, a7, -1824 - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) - li a7, 625 - slli s1, a7, 7 - sw a6, 76(a1) - add a6, a2, s1 - sh2add a7, a0, a6 - lw s0, 0(a7) - lui a7, 21 - addiw a6, a7, -2016 - sw s0, 80(a1) - add s0, a2, a6 - li a6, 1375 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 + sh2add a6, a0, s2 + lui s0, 21 lw a7, 0(s1) - li s1, 375 - sw a7, 84(a1) - slli a7, a6, 6 - add s0, a2, a7 - sh2add a6, a0, s0 - lw a7, 0(a6) - lui a6, 22 - sw a7, 88(a1) - addiw a7, a6, 1888 + sw a7, 0(a6) + addiw a7, s0, -2016 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + li s2, 1375 + lw a7, 0(s0) + sw a7, 0(a6) + slli a7, s2, 6 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 22 + sw a7, 0(a6) + addiw a7, s1, 1888 + add s2, a1, a7 add s0, a2, a7 + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - slli a6, s1, 8 - lui s1, 24 - add s0, a2, a6 - sw a7, 92(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 96(a1) - addiw a6, s1, 1696 - add a7, a2, a6 - sh2add s0, a0, a7 - li a7, 1625 - lw a6, 0(s0) - sw a6, 100(a1) - slli a6, a7, 6 - add s0, a2, a6 - addiw a6, s2, 1504 + lw a7, 0(s1) + li s0, 375 + sw a7, 0(a6) + slli a7, s0, 8 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 - li s2, 125 - add s0, a2, a6 + sh2add a6, a0, s2 + lui s0, 24 lw a7, 0(s1) - sw a7, 104(a1) - sh2add a7, a0, s0 - li s0, 875 - lw a6, 0(a7) - slli s1, s0, 7 - add a7, a2, s1 - lui s1, 28 - sw a6, 108(a1) - sh2add a6, a0, a7 - lw s0, 0(a6) - addiw a6, s1, 1312 - li s1, 1875 - add a7, a2, a6 - sw s0, 112(a1) - sh2add s0, a0, a7 - slli a7, s1, 6 - lw a6, 0(s0) + sw a7, 0(a6) + addiw a7, s0, 1696 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + li s0, 1625 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lui s0, 26 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s0, 1504 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + li s1, 875 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lui s0, 28 + lw a7, 0(s1) + sw a7, 0(a6) + addiw a7, s0, 1312 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1875 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 6 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 30 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 1120 + add s2, a1, a7 add s0, a2, a7 - sw a6, 116(a1) + sh2add s1, a0, s2 sh2add a6, a0, s0 - addiw s0, s1, 1120 - lw a7, 0(a6) - add a6, a2, s0 - sw a7, 120(a1) - sh2add a7, a0, a6 - lw s1, 0(a7) - slli a7, s2, 10 - li s2, 1125 - add a6, a2, a7 - sw s1, 124(a1) - sh2add s0, a0, a6 + lw a7, 0(s1) + li s0, 125 + sw a7, 0(a6) + slli a7, s0, 10 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 32 lw a7, 0(s0) - addiw a6, s1, 928 + sw a7, 0(a6) + addiw a7, s1, 928 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 33 - sw a7, 128(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 132(a1) - addiw a6, s1, 832 - add a7, a2, a6 - sh2add s0, a0, a7 - lui a7, 34 - lw a6, 0(s0) - sw a6, 136(a1) - addiw a6, a7, 736 - add s1, a2, a6 - slli a6, s2, 7 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 832 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - lui s2, 60 - add s1, a2, a6 + sh2add a6, a0, s2 + lui s1, 34 lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 736 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - lui s1, 36 - sw a7, 140(a1) - addiw a6, s1, 544 + sh2add a6, a0, s2 + li s1, 1125 lw a7, 0(s0) - lui s1, 37 - sw a7, 144(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 148(a1) - addiw a6, s1, 448 + sw a7, 0(a6) + slli a7, s1, 7 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lui s1, 36 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s1, 544 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + lui s0, 37 + sw a7, 0(a6) + addiw a7, s0, 448 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 38 - add a7, a2, a6 - sh2add s0, a0, a7 + lw a7, 0(s0) + sw a7, 0(a6) addiw a7, s1, 352 - lw a6, 0(s0) - li s1, 625 - sw a6, 152(a1) - add a6, a2, a7 - sh2add s0, a0, a6 + add s2, a1, a7 + add s0, a2, a7 + sh2add s1, a0, s2 + sh2add a6, a0, s0 + lw a7, 0(s1) + li s0, 625 + sw a7, 0(a6) + slli a7, s0, 8 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 lw a7, 0(s0) - sw a7, 156(a1) - slli a7, s1, 8 lui s1, 40 - add a6, a2, a7 - sh2add s0, a0, a6 - lw a7, 0(s0) - sw a7, 160(a1) + sw a7, 0(a6) addiw a7, s1, 160 - lui s1, 41 - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, 64 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + lui s0, 41 + sw a7, 0(a6) + addiw a7, s0, 64 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + lui s2, 42 + sw a7, 0(a6) + addiw a7, s2, -32 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + li s0, 1375 + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - lui s1, 42 - add s0, a2, a6 - sw a7, 164(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 168(a1) - addiw a6, s1, -32 - li s1, 1375 - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 172(a1) - slli a6, s1, 7 - lui s1, 44 - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s1, -224 - lw a6, 0(s0) - lui s1, 46 - sw a6, 176(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - lui a6, 45 + lui s2, 44 + sw a7, 0(a6) + addiw a7, s2, -224 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 lw a7, 0(s0) - sw a7, 180(a1) - addiw a7, a6, -320 + lui s1, 45 + sw a7, 0(a6) + addiw a7, s1, -320 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + lui s2, 46 + sw a7, 0(a6) + addiw a7, s2, -416 + add s2, a1, a7 add s0, a2, a7 + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 184(a1) - addiw a7, s1, -416 - lui s1, 48 - add a6, a2, a7 - sh2add s0, a0, a6 - li a6, 375 + lw a7, 0(s1) + li s0, 375 + sw a7, 0(a6) + slli a7, s0, 9 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - sw a7, 188(a1) - slli a7, a6, 9 + lui s2, 48 + sw a7, 0(a6) + addiw a7, s2, -608 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + lui s1, 49 + sw a7, 0(a6) + addiw a7, s1, -704 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, s1, -608 lui s1, 50 - add s0, a2, a6 - sw a7, 192(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - lui a7, 49 - sw a6, 196(a1) - addiw a6, a7, -704 - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) + lw a7, 0(s2) + sw a7, 0(a6) addiw a7, s1, -800 - li s1, 1625 - sw a6, 200(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - slli a6, s1, 7 - lw a7, 0(s0) + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + li s0, 1625 + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 52 - add s0, a2, a6 - sw a7, 204(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 208(a1) - addiw a6, s1, -992 - lui s1, 53 - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s1, -1088 - lw a6, 0(s0) - lui s1, 54 - sw a6, 212(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, -1184 lw a7, 0(s0) - li s1, 875 - sw a7, 216(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - slli a7, s1, 8 - lw a6, 0(s0) - lui s1, 57 + sw a7, 0(a6) + addiw a7, s1, -992 + add s2, a1, a7 add s0, a2, a7 - sw a6, 220(a1) + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - lui a6, 56 - sw a7, 224(a1) - addiw a7, a6, -1376 + lw a7, 0(s1) + lui s0, 53 + sw a7, 0(a6) + addiw a7, s0, -1088 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lui s2, 54 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s2, -1184 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + li s1, 875 + sw a7, 0(a6) + slli a7, s1, 8 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 228(a1) + lw a7, 0(s2) + lui s0, 56 + sw a7, 0(a6) + addiw a7, s0, -1376 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + lui s1, 57 + sw a7, 0(a6) addiw a7, s1, -1472 - li s1, 1875 - add a6, a2, a7 - sh2add s0, a0, a6 - lui a6, 58 - lw a7, 0(s0) - sw a7, 232(a1) - addiw a7, a6, -1568 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - slli a6, s1, 7 - sw a7, 236(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s2, -1760 - lw a6, 0(s0) - lui s2, 61 - add s0, a2, a7 - addiw a7, s2, -1856 + lw a7, 0(s2) + lui s0, 58 + sw a7, 0(a6) + addiw a7, s0, -1568 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + li s1, 1875 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 - sw a6, 240(a1) - lw a6, 0(s1) + sh2add a6, a0, s2 + lui s0, 60 + lw a7, 0(s1) + sw a7, 0(a6) + addiw a7, s0, -1760 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 61 + sw a7, 0(a6) + addiw a7, s1, -1856 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 62 - sw a6, 244(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, -1952 lw a7, 0(s0) - add s0, a2, a6 - sw a7, 248(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - li a7, 960 - sw a6, 252(a1) - bge t6, a7, label9 - addi a1, a1, 256 - j label5 -label11: - ld s1, 0(sp) - ld s0, 8(sp) + sw a7, 0(a6) + addiw a7, s1, -1952 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + sw a7, 0(a6) + li a6, 960 + bge t6, a6, label8 + li a6, 125 + slli a2, a6, 11 + add a1, a1, a2 + j label4 +label9: + ld s0, 0(sp) + ld s1, 8(sp) ld s2, 16(sp) - addi sp, sp, 24 + ld s3, 24(sp) + addi sp, sp, 32 ret .p2align 2 cmmc_parallel_body_1: - addi sp, sp, -80 - mv t4, a1 -pcrel1086: + addi sp, sp, -56 + mv t1, a1 +pcrel1303: auipc a4, %pcrel_hi(c) li a5, 125 - mv t6, a0 - addi a3, a4, %pcrel_lo(pcrel1086) - slli a2, a5, 5 + mv t4, a0 + addi a2, a4, %pcrel_lo(pcrel1303) + slli a3, a5, 5 sd s0, 0(sp) - sh2add t1, a2, a2 - sh1add a5, a2, a2 - slli a4, a2, 1 - mul a1, a0, a2 + li a5, 1000 + mul a1, a0, a3 sd s5, 8(sp) - slli t0, a4, 1 -pcrel1087: - auipc a0, %pcrel_hi(a) - add t3, a3, a1 - addi t5, a0, %pcrel_lo(pcrel1087) +pcrel1304: + auipc a0, %pcrel_hi(b) + add a4, a2, a1 sd s1, 16(sp) -pcrel1088: - auipc a1, %pcrel_hi(b) + addi t2, a0, %pcrel_lo(pcrel1304) +pcrel1305: + auipc a1, %pcrel_hi(a) sd s6, 24(sp) - addi a3, a1, %pcrel_lo(pcrel1088) - sd s3, 32(sp) - sd s4, 40(sp) + addi t3, a1, %pcrel_lo(pcrel1305) + sd s4, 32(sp) + li a1, 992 + sd s3, 40(sp) sd s2, 48(sp) - sd s7, 56(sp) - sd s9, 64(sp) - sd s8, 72(sp) - mul a1, t6, a2 - mv a6, t3 - mv a0, zero - add t2, t5, a1 - mv a1, t2 - mv s0, zero - mv s1, zero - j label781 + mul a0, t4, a3 + mv a2, t2 + mv t5, zero + add t0, t3, a0 + mv a0, t0 + mv a6, zero + mv a7, zero + j label1090 .p2align 2 -label786: - li s0, 125 - lui s6, 17 - slli s4, s0, 9 - lw s0, 64(a1) - add s2, a7, s4 - sh2add s3, a0, s2 - lw s4, 0(s3) - mulw s5, s0, s4 - addiw s4, s6, -1632 - addw s2, s1, s5 - li s6, 1125 - add s0, a7, s4 - lw s1, 68(a1) - sh2add s3, a0, s0 - lw s4, 0(s3) - mulw s5, s1, s4 - slli s4, s6, 6 - addw s0, s2, s5 - lui s6, 19 - add s1, a7, s4 - lw s2, 72(a1) - sh2add s3, a0, s1 - lw s4, 0(s3) - mulw s5, s2, s4 - lw s2, 76(a1) - addiw s4, s6, -1824 - addw s1, s0, s5 - li s6, 625 - add s0, a7, s4 - sh2add s3, a0, s0 - lw s5, 0(s3) +label1095: + lw a6, 64(a0) + lw s3, 64(t6) + lw s1, 68(a0) + mulw s2, a6, s3 + lw s3, 68(t6) + addw s0, a7, s2 + lw s2, 72(a0) + mulw a7, s1, s3 + lw s3, 72(t6) + addw a6, s0, a7 + lw s0, 76(a0) + mulw s1, s2, s3 + lw s2, 76(t6) + addw a7, a6, s1 + lw s1, 80(a0) + lw s3, 80(t6) + mulw s4, s0, s2 + lw s0, 84(a0) + addw a6, a7, s4 + mulw s2, s1, s3 + lw s3, 84(t6) + addw a7, a6, s2 + lw s1, 88(a0) + mulw s2, s0, s3 + lw s3, 88(t6) + addw a6, a7, s2 + lw s0, 92(a0) + mulw s2, s1, s3 + lw s1, 92(t6) + addw a7, a6, s2 + sh2add t6, t5, a4 + addiw t5, t5, 1 + mulw a6, s0, s1 + addw a0, a7, a6 + sw a0, 0(t6) + bge t5, a5, label1266 + add a2, a2, a3 + mv a0, t0 + mv a6, zero + mv a7, zero +.p2align 2 +label1090: + sh2add t6, a6, a2 + lw s0, 0(a0) + addiw a6, a6, 16 + lw s1, 0(t6) + lw s4, 4(a0) + lw s5, 4(t6) + mulw s6, s0, s1 + mulw s3, s4, s5 + lw s4, 8(a0) + addw s2, s3, s6 + lw s5, 8(t6) + lw s3, 12(a0) + mulw s1, s4, s5 + lw s5, 12(t6) + addw s0, s2, s1 + lw s2, 16(a0) + mulw s4, s3, s5 + lw s5, 16(t6) + addw s1, s0, s4 + lw s3, 20(a0) mulw s4, s2, s5 - lw s2, 80(a1) - slli s5, s6, 7 + lw s5, 20(t6) addw s0, s1, s4 - lui s6, 21 - add s1, a7, s5 - sh2add s3, a0, s1 - lw s4, 0(s3) + lw s4, 24(a0) + mulw s1, s3, s5 + lw s5, 24(t6) + addw s2, s0, s1 + lw s3, 28(a0) + mulw s0, s4, s5 + lw s4, 28(t6) + addw s1, s2, s0 + lw s2, 32(a0) + mulw s5, s3, s4 + lw s4, 32(t6) + addw s0, s1, s5 + lw s3, 36(a0) + lw s6, 36(t6) mulw s5, s2, s4 - lw s2, 84(a1) - addiw s4, s6, -2016 + lw s2, 40(a0) addw s1, s0, s5 - li s6, 1375 - add s0, a7, s4 - sh2add s3, a0, s0 - lw s4, 0(s3) - mulw s5, s2, s4 - lw s2, 88(a1) - slli s4, s6, 6 - addw s0, s1, s5 - lui s6, 22 - add s1, a7, s4 - sh2add s3, a0, s1 - lw s5, 0(s3) + mulw s4, s3, s6 + lw s5, 40(t6) + addw s0, s1, s4 + lw s3, 44(a0) mulw s4, s2, s5 - addiw s2, s6, 1888 + lw s5, 44(t6) addw s1, s0, s4 - add s3, a7, s2 - lw a7, 92(a1) - sh2add s0, a0, s3 - addiw a0, a0, 1 - lw s4, 0(s0) - mulw s2, a7, s4 - li a7, 1000 - addw a1, s1, s2 - sw a1, 0(a6) - bge a0, a7, label1003 - addi a6, a6, 4 - mv a1, t2 - mv s0, zero - mv s1, zero -.p2align 2 -label781: - mul s3, s0, a2 - lw s2, 0(a1) - addiw s0, s0, 16 - add a7, a3, s3 - sh2add s4, a0, a7 - add s6, a7, a2 - sh2add s7, a0, s6 - lw s3, 0(s4) - lw s5, 4(a1) - lw s9, 0(s7) - mulw s8, s2, s3 - add s7, a7, a4 - lw s2, 8(a1) - mulw s6, s5, s9 - sh2add s5, a0, s7 - addw s4, s6, s8 - add s6, a7, a5 - lw s8, 0(s5) - sh2add s5, a0, s6 - add s6, a7, t0 - mulw s7, s2, s8 - addw s3, s4, s7 - lw s4, 12(a1) - lw s8, 0(s5) - sh2add s5, a0, s6 - add s6, a7, t1 - mulw s7, s4, s8 - lw s4, 16(a1) - addw s2, s3, s7 - lw s7, 0(s5) - sh2add s5, a0, s6 - mulw s8, s4, s7 - lw s4, 20(a1) - addw s3, s2, s8 - lw s7, 0(s5) - li s8, 125 - li s5, 375 - mulw s6, s4, s7 - slli s7, s5, 6 - addw s2, s3, s6 - lw s5, 24(a1) - add s3, a7, s7 - sh2add s4, a0, s3 - lw s6, 0(s4) - li s4, 875 - mulw s7, s5, s6 - slli s6, s4, 5 - addw s3, s2, s7 - lw s4, 28(a1) - add s2, a7, s6 - sh2add s5, a0, s2 - lw s6, 0(s5) - mulw s7, s4, s6 - lw s4, 32(a1) - slli s6, s8, 8 - addw s2, s3, s7 - li s8, 1125 - add s3, a7, s6 - sh2add s5, a0, s3 - lw s6, 0(s5) - mulw s7, s4, s6 - lw s4, 36(a1) - slli s6, s8, 5 - addw s3, s2, s7 - li s8, 1375 - add s2, a7, s6 - sh2add s5, a0, s2 - lw s7, 0(s5) - li s5, 625 - mulw s6, s4, s7 - slli s7, s5, 6 - addw s2, s3, s6 - lw s5, 40(a1) - add s3, a7, s7 - sh2add s4, a0, s3 - lw s6, 0(s4) - mulw s7, s5, s6 - lw s5, 44(a1) - slli s6, s8, 5 - addw s3, s2, s7 - li s8, 875 - add s2, a7, s6 - sh2add s4, a0, s2 - lw s7, 0(s4) - li s4, 375 - mulw s6, s5, s7 - slli s7, s4, 7 - addw s2, s3, s6 - lw s4, 48(a1) - add s3, a7, s7 - sh2add s5, a0, s3 - lw s6, 0(s5) - li s5, 1625 - mulw s7, s4, s6 - slli s6, s5, 5 - addw s3, s2, s7 - lw s5, 52(a1) - add s4, a7, s6 - sh2add s2, a0, s4 - lw s7, 0(s2) - mulw s6, s5, s7 - slli s7, s8, 6 - addw s4, s3, s6 - li s8, 1875 - add s2, a7, s7 - lw s3, 56(a1) - sh2add s5, a0, s2 - lw s6, 0(s5) - mulw s7, s3, s6 - lw s3, 60(a1) - slli s6, s8, 5 - addw s2, s4, s7 - add s5, a7, s6 - sh2add s4, a0, s5 - lw s7, 0(s4) - mulw s6, s3, s7 - addw s5, s2, s6 - li s2, 992 - addw s1, s1, s5 - bge s0, s2, label786 - addi a1, a1, 64 - j label781 + lw s4, 48(a0) + mulw s2, s3, s5 + lw s3, 48(t6) + addw s0, s1, s2 + lw s2, 52(a0) + lw s5, 52(t6) + mulw s6, s4, s3 + lw s3, 56(a0) + addw s1, s0, s6 + mulw s4, s2, s5 + lw s5, 56(t6) + addw s0, s1, s4 + lw s2, 60(a0) + lw s4, 60(t6) + mulw s6, s3, s5 + addw s1, s0, s6 + mulw s3, s2, s4 + addw s0, s1, s3 + addw a7, a7, s0 + bge a6, a1, label1095 + addi a0, a0, 64 + j label1090 .p2align 2 -label1003: - addiw t6, t6, 1 - ble t4, t6, label790 - add t3, t3, a2 - mul a1, t6, a2 - mv a0, zero - mv s0, zero - mv s1, zero - mv a6, t3 - add t2, t5, a1 - mv a1, t2 - j label781 -label790: +label1266: + addiw t4, t4, 1 + ble t1, t4, label1099 + add a4, a4, a3 + mul a0, t4, a3 + mv a2, t2 + mv t5, zero + mv a6, zero + mv a7, zero + add t0, t3, a0 + mv a0, t0 + j label1090 +label1099: ld s0, 0(sp) ld s5, 8(sp) ld s1, 16(sp) ld s6, 24(sp) - ld s3, 32(sp) - ld s4, 40(sp) + ld s4, 32(sp) + ld s3, 40(sp) ld s2, 48(sp) - ld s7, 56(sp) - ld s9, 64(sp) - ld s8, 72(sp) - addi sp, sp, 80 + addi sp, sp, 56 ret .p2align 2 cmmc_parallel_body_2: mv t0, a0 mv a2, a1 addiw a4, a0, 3 -pcrel1192: +pcrel1409: auipc a5, %pcrel_hi(cmmc_parallel_body_payload_2) - ld a3, %pcrel_lo(pcrel1192)(a5) - addi a1, a5, %pcrel_lo(pcrel1192) + ld a3, %pcrel_lo(pcrel1409)(a5) + addi a1, a5, %pcrel_lo(pcrel1409) lw a0, 8(a1) - ble a2, a4, label1090 + ble a2, a4, label1307 addiw t1, t0, 15 addiw a4, a2, -3 addiw a5, a2, -18 - bge t1, a4, label1141 + bge t1, a4, label1358 sh2add a1, t0, a3 - j label1100 + j label1317 .p2align 2 -label1103: +label1320: addi a1, a1, 64 .p2align 2 -label1100: +label1317: sw a0, 0(a1) addiw t0, t0, 16 sw a0, 4(a1) @@ -1547,59 +1664,59 @@ label1100: sw a0, 52(a1) sw a0, 56(a1) sw a0, 60(a1) - bgt a5, t0, label1103 + bgt a5, t0, label1320 mv a1, t0 -label1104: - ble a4, a1, label1090 +label1321: + ble a4, a1, label1307 sh2add a5, a1, a3 -label1108: +label1325: sw a0, 0(a5) addiw a1, a1, 4 sw a0, 4(a5) sw a0, 8(a5) sw a0, 12(a5) - ble a4, a1, label1178 + ble a4, a1, label1395 addi a5, a5, 16 - j label1108 -label1178: + j label1325 +label1395: mv t0, a1 -label1090: - ble a2, t0, label1097 +label1307: + ble a2, t0, label1314 sh2add a1, t0, a3 - j label1093 -label1096: + j label1310 +label1313: addi a1, a1, 4 -label1093: +label1310: addiw t0, t0, 1 sw a0, 0(a1) - bgt a2, t0, label1096 -label1097: + bgt a2, t0, label1313 +label1314: ret -label1141: +label1358: mv a1, t0 mv t0, zero - j label1104 + j label1321 .p2align 2 cmmc_parallel_body_3: mv t0, a0 addiw a5, a0, 3 -pcrel1347: +pcrel1564: auipc a0, %pcrel_hi(cmmc_parallel_body_payload_3) - addi a2, a0, %pcrel_lo(pcrel1347) + addi a2, a0, %pcrel_lo(pcrel1564) ld a3, 8(a2) - ble a1, a5, label1235 + ble a1, a5, label1452 addiw a0, t0, 15 addiw a4, a1, -3 addiw a5, a1, -18 - bge a0, a4, label1242 + bge a0, a4, label1459 sh2add a0, t0, a3 mv t1, zero - j label1210 + j label1427 .p2align 2 -label1214: +label1431: addi a0, a0, 64 .p2align 2 -label1210: +label1427: lw t4, 0(a0) addiw t0, t0, 16 lw t5, 4(a0) @@ -1633,17 +1750,17 @@ label1210: lw t4, 60(a0) addw t2, t3, t6 addw t1, t2, t4 - bgt a5, t0, label1214 + bgt a5, t0, label1431 mv a5, t0 mv t2, t1 -label1195: - ble a4, a5, label1246 +label1412: + ble a4, a5, label1463 sh2add a0, a5, a3 mv t0, t2 - j label1204 -label1208: + j label1421 +label1425: addi a0, a0, 16 -label1204: +label1421: lw t1, 0(a0) addiw a5, a5, 4 lw t4, 4(a0) @@ -1653,39 +1770,39 @@ label1204: lw t3, 12(a0) addw t1, t2, t5 addw t0, t1, t3 - bgt a4, a5, label1208 + bgt a4, a5, label1425 mv a0, t0 mv a4, t0 mv t0, a5 -label1215: - ble a1, t0, label1324 +label1432: + ble a1, t0, label1541 sh2add a0, t0, a3 mv a3, a4 - j label1222 -label1226: + j label1439 +label1443: addi a0, a0, 4 -label1222: +label1439: lw a5, 0(a0) addiw t0, t0, 1 addw a3, a3, a5 - bgt a1, t0, label1226 -label1219: + bgt a1, t0, label1443 +label1436: amoadd.w.aqrl a1, a3, (a2) ret -label1246: +label1463: mv a0, t1 mv a4, t1 - j label1215 -label1242: + j label1432 +label1459: mv a5, t0 mv t2, zero mv t1, zero mv t0, zero - j label1195 -label1324: + j label1412 +label1541: mv a3, a0 - j label1219 -label1235: + j label1436 +label1452: mv a4, zero mv a0, zero - j label1215 + j label1432 diff --git a/tests/SysY2022/performance/matmul1.sy.ir b/tests/SysY2022/performance/matmul1.sy.ir index 4bc2f50e6..f140ebcc6 100644 --- a/tests/SysY2022/performance/matmul1.sy.ir +++ b/tests/SysY2022/performance/matmul1.sy.ir @@ -3,7 +3,7 @@ internal func @putint(i32) -> void { NoMemoryRead NoMemoryWrite }; internal func @starttime(i32) -> void { NoMemoryRead NoMemoryWrite }; internal func @stoptime(i32) -> void { NoMemoryRead NoMemoryWrite }; internal [1000 * [1000 * i32]]* @a, align 8; -internal [1000 * [1000 * i32]]* @b, align 8 { Flexible }; +internal [1000 * [1000 * i32]]* @b, align 8 { Flexible Transposed }; internal [1000 * [1000 * i32]]* @c, align 8 { Flexible }; func @main() -> i32 { NoRecurse Entry } { ^entry: @@ -550,743 +550,823 @@ func @main() -> i32 { NoRecurse Entry } { internal func @cmmcParallelFor(i32, i32, i8*) -> void { NoRecurse }; internal func @cmmc_parallel_body_0(i32 %0, i32 %1) -> void { NoRecurse ParallelBody AlignedParallelBody } { ^b: - [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; ubr ^b1; ^b1: - i32 %4 = phi [^b, i32 %0] [^while.body1, i32 %425]; - [1000 * i32]* %5 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %4]; + i32 %4 = phi [^b, i32 %0] [^while.body1, i32 %528]; ubr ^while.body; ^while.body: - i32 %6 = phi [^b1, i32 0] [^while.body, i32 %263]; - [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %6]; - i32* %8 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %4]; - i32 %9 = load i32* %8; - i32* %10 = getelementptr &([1000 * i32]* %5)[i64 0][i32 %6]; - store i32* %10 with i32 %9; - [1000 * i32]* %11 = getelementptr &([1000 * i32]* %7)[i64 1]; + i32 %5 = phi [^b1, i32 0] [^while.body, i32 %326]; + [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %5]; + i32* %7 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %4]; + i32 %8 = load i32* %7; + [1000 * i32]* %9 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; + i32* %10 = getelementptr &([1000 * i32]* %9)[i64 0][i32 %4]; + store i32* %10 with i32 %8; + [1000 * i32]* %11 = getelementptr &([1000 * i32]* %6)[i64 1]; i32* %12 = getelementptr &([1000 * i32]* %11)[i64 0][i32 %4]; i32 %13 = load i32* %12; - i32* %14 = getelementptr &(i32* %10)[i64 1]; - store i32* %14 with i32 %13; - [1000 * i32]* %15 = getelementptr &([1000 * i32]* %7)[i64 2]; - i32* %16 = getelementptr &([1000 * i32]* %15)[i64 0][i32 %4]; - i32 %17 = load i32* %16; - i32* %18 = getelementptr &(i32* %10)[i64 2]; - store i32* %18 with i32 %17; - [1000 * i32]* %19 = getelementptr &([1000 * i32]* %7)[i64 3]; + [1000 * i32]* %14 = getelementptr &([1000 * i32]* %9)[i64 1]; + i32* %15 = getelementptr &([1000 * i32]* %14)[i64 0][i32 %4]; + store i32* %15 with i32 %13; + [1000 * i32]* %16 = getelementptr &([1000 * i32]* %6)[i64 2]; + i32* %17 = getelementptr &([1000 * i32]* %16)[i64 0][i32 %4]; + i32 %18 = load i32* %17; + [1000 * i32]* %19 = getelementptr &([1000 * i32]* %9)[i64 2]; i32* %20 = getelementptr &([1000 * i32]* %19)[i64 0][i32 %4]; - i32 %21 = load i32* %20; - i32* %22 = getelementptr &(i32* %10)[i64 3]; - store i32* %22 with i32 %21; - [1000 * i32]* %23 = getelementptr &([1000 * i32]* %7)[i64 4]; - i32* %24 = getelementptr &([1000 * i32]* %23)[i64 0][i32 %4]; - i32 %25 = load i32* %24; - i32* %26 = getelementptr &(i32* %10)[i64 4]; - store i32* %26 with i32 %25; - [1000 * i32]* %27 = getelementptr &([1000 * i32]* %7)[i64 5]; - i32* %28 = getelementptr &([1000 * i32]* %27)[i64 0][i32 %4]; - i32 %29 = load i32* %28; - i32* %30 = getelementptr &(i32* %10)[i64 5]; - store i32* %30 with i32 %29; - [1000 * i32]* %31 = getelementptr &([1000 * i32]* %7)[i64 6]; + store i32* %20 with i32 %18; + [1000 * i32]* %21 = getelementptr &([1000 * i32]* %6)[i64 3]; + i32* %22 = getelementptr &([1000 * i32]* %21)[i64 0][i32 %4]; + i32 %23 = load i32* %22; + [1000 * i32]* %24 = getelementptr &([1000 * i32]* %9)[i64 3]; + i32* %25 = getelementptr &([1000 * i32]* %24)[i64 0][i32 %4]; + store i32* %25 with i32 %23; + [1000 * i32]* %26 = getelementptr &([1000 * i32]* %6)[i64 4]; + i32* %27 = getelementptr &([1000 * i32]* %26)[i64 0][i32 %4]; + i32 %28 = load i32* %27; + [1000 * i32]* %29 = getelementptr &([1000 * i32]* %9)[i64 4]; + i32* %30 = getelementptr &([1000 * i32]* %29)[i64 0][i32 %4]; + store i32* %30 with i32 %28; + [1000 * i32]* %31 = getelementptr &([1000 * i32]* %6)[i64 5]; i32* %32 = getelementptr &([1000 * i32]* %31)[i64 0][i32 %4]; i32 %33 = load i32* %32; - i32* %34 = getelementptr &(i32* %10)[i64 6]; - store i32* %34 with i32 %33; - [1000 * i32]* %35 = getelementptr &([1000 * i32]* %7)[i64 7]; - i32* %36 = getelementptr &([1000 * i32]* %35)[i64 0][i32 %4]; - i32 %37 = load i32* %36; - i32* %38 = getelementptr &(i32* %10)[i64 7]; - store i32* %38 with i32 %37; - [1000 * i32]* %39 = getelementptr &([1000 * i32]* %7)[i64 8]; + [1000 * i32]* %34 = getelementptr &([1000 * i32]* %9)[i64 5]; + i32* %35 = getelementptr &([1000 * i32]* %34)[i64 0][i32 %4]; + store i32* %35 with i32 %33; + [1000 * i32]* %36 = getelementptr &([1000 * i32]* %6)[i64 6]; + i32* %37 = getelementptr &([1000 * i32]* %36)[i64 0][i32 %4]; + i32 %38 = load i32* %37; + [1000 * i32]* %39 = getelementptr &([1000 * i32]* %9)[i64 6]; i32* %40 = getelementptr &([1000 * i32]* %39)[i64 0][i32 %4]; - i32 %41 = load i32* %40; - i32* %42 = getelementptr &(i32* %10)[i64 8]; - store i32* %42 with i32 %41; - [1000 * i32]* %43 = getelementptr &([1000 * i32]* %7)[i64 9]; - i32* %44 = getelementptr &([1000 * i32]* %43)[i64 0][i32 %4]; - i32 %45 = load i32* %44; - i32* %46 = getelementptr &(i32* %10)[i64 9]; - store i32* %46 with i32 %45; - [1000 * i32]* %47 = getelementptr &([1000 * i32]* %7)[i64 10]; - i32* %48 = getelementptr &([1000 * i32]* %47)[i64 0][i32 %4]; - i32 %49 = load i32* %48; - i32* %50 = getelementptr &(i32* %10)[i64 10]; - store i32* %50 with i32 %49; - [1000 * i32]* %51 = getelementptr &([1000 * i32]* %7)[i64 11]; + store i32* %40 with i32 %38; + [1000 * i32]* %41 = getelementptr &([1000 * i32]* %6)[i64 7]; + i32* %42 = getelementptr &([1000 * i32]* %41)[i64 0][i32 %4]; + i32 %43 = load i32* %42; + [1000 * i32]* %44 = getelementptr &([1000 * i32]* %9)[i64 7]; + i32* %45 = getelementptr &([1000 * i32]* %44)[i64 0][i32 %4]; + store i32* %45 with i32 %43; + [1000 * i32]* %46 = getelementptr &([1000 * i32]* %6)[i64 8]; + i32* %47 = getelementptr &([1000 * i32]* %46)[i64 0][i32 %4]; + i32 %48 = load i32* %47; + [1000 * i32]* %49 = getelementptr &([1000 * i32]* %9)[i64 8]; + i32* %50 = getelementptr &([1000 * i32]* %49)[i64 0][i32 %4]; + store i32* %50 with i32 %48; + [1000 * i32]* %51 = getelementptr &([1000 * i32]* %6)[i64 9]; i32* %52 = getelementptr &([1000 * i32]* %51)[i64 0][i32 %4]; i32 %53 = load i32* %52; - i32* %54 = getelementptr &(i32* %10)[i64 11]; - store i32* %54 with i32 %53; - [1000 * i32]* %55 = getelementptr &([1000 * i32]* %7)[i64 12]; - i32* %56 = getelementptr &([1000 * i32]* %55)[i64 0][i32 %4]; - i32 %57 = load i32* %56; - i32* %58 = getelementptr &(i32* %10)[i64 12]; - store i32* %58 with i32 %57; - [1000 * i32]* %59 = getelementptr &([1000 * i32]* %7)[i64 13]; + [1000 * i32]* %54 = getelementptr &([1000 * i32]* %9)[i64 9]; + i32* %55 = getelementptr &([1000 * i32]* %54)[i64 0][i32 %4]; + store i32* %55 with i32 %53; + [1000 * i32]* %56 = getelementptr &([1000 * i32]* %6)[i64 10]; + i32* %57 = getelementptr &([1000 * i32]* %56)[i64 0][i32 %4]; + i32 %58 = load i32* %57; + [1000 * i32]* %59 = getelementptr &([1000 * i32]* %9)[i64 10]; i32* %60 = getelementptr &([1000 * i32]* %59)[i64 0][i32 %4]; - i32 %61 = load i32* %60; - i32* %62 = getelementptr &(i32* %10)[i64 13]; - store i32* %62 with i32 %61; - [1000 * i32]* %63 = getelementptr &([1000 * i32]* %7)[i64 14]; - i32* %64 = getelementptr &([1000 * i32]* %63)[i64 0][i32 %4]; - i32 %65 = load i32* %64; - i32* %66 = getelementptr &(i32* %10)[i64 14]; - store i32* %66 with i32 %65; - [1000 * i32]* %67 = getelementptr &([1000 * i32]* %7)[i64 15]; - i32* %68 = getelementptr &([1000 * i32]* %67)[i64 0][i32 %4]; - i32 %69 = load i32* %68; - i32* %70 = getelementptr &(i32* %10)[i64 15]; - store i32* %70 with i32 %69; - [1000 * i32]* %71 = getelementptr &([1000 * i32]* %7)[i64 16]; + store i32* %60 with i32 %58; + [1000 * i32]* %61 = getelementptr &([1000 * i32]* %6)[i64 11]; + i32* %62 = getelementptr &([1000 * i32]* %61)[i64 0][i32 %4]; + i32 %63 = load i32* %62; + [1000 * i32]* %64 = getelementptr &([1000 * i32]* %9)[i64 11]; + i32* %65 = getelementptr &([1000 * i32]* %64)[i64 0][i32 %4]; + store i32* %65 with i32 %63; + [1000 * i32]* %66 = getelementptr &([1000 * i32]* %6)[i64 12]; + i32* %67 = getelementptr &([1000 * i32]* %66)[i64 0][i32 %4]; + i32 %68 = load i32* %67; + [1000 * i32]* %69 = getelementptr &([1000 * i32]* %9)[i64 12]; + i32* %70 = getelementptr &([1000 * i32]* %69)[i64 0][i32 %4]; + store i32* %70 with i32 %68; + [1000 * i32]* %71 = getelementptr &([1000 * i32]* %6)[i64 13]; i32* %72 = getelementptr &([1000 * i32]* %71)[i64 0][i32 %4]; i32 %73 = load i32* %72; - i32* %74 = getelementptr &(i32* %10)[i64 16]; - store i32* %74 with i32 %73; - [1000 * i32]* %75 = getelementptr &([1000 * i32]* %7)[i64 17]; - i32* %76 = getelementptr &([1000 * i32]* %75)[i64 0][i32 %4]; - i32 %77 = load i32* %76; - i32* %78 = getelementptr &(i32* %10)[i64 17]; - store i32* %78 with i32 %77; - [1000 * i32]* %79 = getelementptr &([1000 * i32]* %7)[i64 18]; + [1000 * i32]* %74 = getelementptr &([1000 * i32]* %9)[i64 13]; + i32* %75 = getelementptr &([1000 * i32]* %74)[i64 0][i32 %4]; + store i32* %75 with i32 %73; + [1000 * i32]* %76 = getelementptr &([1000 * i32]* %6)[i64 14]; + i32* %77 = getelementptr &([1000 * i32]* %76)[i64 0][i32 %4]; + i32 %78 = load i32* %77; + [1000 * i32]* %79 = getelementptr &([1000 * i32]* %9)[i64 14]; i32* %80 = getelementptr &([1000 * i32]* %79)[i64 0][i32 %4]; - i32 %81 = load i32* %80; - i32* %82 = getelementptr &(i32* %10)[i64 18]; - store i32* %82 with i32 %81; - [1000 * i32]* %83 = getelementptr &([1000 * i32]* %7)[i64 19]; - i32* %84 = getelementptr &([1000 * i32]* %83)[i64 0][i32 %4]; - i32 %85 = load i32* %84; - i32* %86 = getelementptr &(i32* %10)[i64 19]; - store i32* %86 with i32 %85; - [1000 * i32]* %87 = getelementptr &([1000 * i32]* %7)[i64 20]; - i32* %88 = getelementptr &([1000 * i32]* %87)[i64 0][i32 %4]; - i32 %89 = load i32* %88; - i32* %90 = getelementptr &(i32* %10)[i64 20]; - store i32* %90 with i32 %89; - [1000 * i32]* %91 = getelementptr &([1000 * i32]* %7)[i64 21]; + store i32* %80 with i32 %78; + [1000 * i32]* %81 = getelementptr &([1000 * i32]* %6)[i64 15]; + i32* %82 = getelementptr &([1000 * i32]* %81)[i64 0][i32 %4]; + i32 %83 = load i32* %82; + [1000 * i32]* %84 = getelementptr &([1000 * i32]* %9)[i64 15]; + i32* %85 = getelementptr &([1000 * i32]* %84)[i64 0][i32 %4]; + store i32* %85 with i32 %83; + [1000 * i32]* %86 = getelementptr &([1000 * i32]* %6)[i64 16]; + i32* %87 = getelementptr &([1000 * i32]* %86)[i64 0][i32 %4]; + i32 %88 = load i32* %87; + [1000 * i32]* %89 = getelementptr &([1000 * i32]* %9)[i64 16]; + i32* %90 = getelementptr &([1000 * i32]* %89)[i64 0][i32 %4]; + store i32* %90 with i32 %88; + [1000 * i32]* %91 = getelementptr &([1000 * i32]* %6)[i64 17]; i32* %92 = getelementptr &([1000 * i32]* %91)[i64 0][i32 %4]; i32 %93 = load i32* %92; - i32* %94 = getelementptr &(i32* %10)[i64 21]; - store i32* %94 with i32 %93; - [1000 * i32]* %95 = getelementptr &([1000 * i32]* %7)[i64 22]; - i32* %96 = getelementptr &([1000 * i32]* %95)[i64 0][i32 %4]; - i32 %97 = load i32* %96; - i32* %98 = getelementptr &(i32* %10)[i64 22]; - store i32* %98 with i32 %97; - [1000 * i32]* %99 = getelementptr &([1000 * i32]* %7)[i64 23]; + [1000 * i32]* %94 = getelementptr &([1000 * i32]* %9)[i64 17]; + i32* %95 = getelementptr &([1000 * i32]* %94)[i64 0][i32 %4]; + store i32* %95 with i32 %93; + [1000 * i32]* %96 = getelementptr &([1000 * i32]* %6)[i64 18]; + i32* %97 = getelementptr &([1000 * i32]* %96)[i64 0][i32 %4]; + i32 %98 = load i32* %97; + [1000 * i32]* %99 = getelementptr &([1000 * i32]* %9)[i64 18]; i32* %100 = getelementptr &([1000 * i32]* %99)[i64 0][i32 %4]; - i32 %101 = load i32* %100; - i32* %102 = getelementptr &(i32* %10)[i64 23]; - store i32* %102 with i32 %101; - [1000 * i32]* %103 = getelementptr &([1000 * i32]* %7)[i64 24]; - i32* %104 = getelementptr &([1000 * i32]* %103)[i64 0][i32 %4]; - i32 %105 = load i32* %104; - i32* %106 = getelementptr &(i32* %10)[i64 24]; - store i32* %106 with i32 %105; - [1000 * i32]* %107 = getelementptr &([1000 * i32]* %7)[i64 25]; - i32* %108 = getelementptr &([1000 * i32]* %107)[i64 0][i32 %4]; - i32 %109 = load i32* %108; - i32* %110 = getelementptr &(i32* %10)[i64 25]; - store i32* %110 with i32 %109; - [1000 * i32]* %111 = getelementptr &([1000 * i32]* %7)[i64 26]; + store i32* %100 with i32 %98; + [1000 * i32]* %101 = getelementptr &([1000 * i32]* %6)[i64 19]; + i32* %102 = getelementptr &([1000 * i32]* %101)[i64 0][i32 %4]; + i32 %103 = load i32* %102; + [1000 * i32]* %104 = getelementptr &([1000 * i32]* %9)[i64 19]; + i32* %105 = getelementptr &([1000 * i32]* %104)[i64 0][i32 %4]; + store i32* %105 with i32 %103; + [1000 * i32]* %106 = getelementptr &([1000 * i32]* %6)[i64 20]; + i32* %107 = getelementptr &([1000 * i32]* %106)[i64 0][i32 %4]; + i32 %108 = load i32* %107; + [1000 * i32]* %109 = getelementptr &([1000 * i32]* %9)[i64 20]; + i32* %110 = getelementptr &([1000 * i32]* %109)[i64 0][i32 %4]; + store i32* %110 with i32 %108; + [1000 * i32]* %111 = getelementptr &([1000 * i32]* %6)[i64 21]; i32* %112 = getelementptr &([1000 * i32]* %111)[i64 0][i32 %4]; i32 %113 = load i32* %112; - i32* %114 = getelementptr &(i32* %10)[i64 26]; - store i32* %114 with i32 %113; - [1000 * i32]* %115 = getelementptr &([1000 * i32]* %7)[i64 27]; - i32* %116 = getelementptr &([1000 * i32]* %115)[i64 0][i32 %4]; - i32 %117 = load i32* %116; - i32* %118 = getelementptr &(i32* %10)[i64 27]; - store i32* %118 with i32 %117; - [1000 * i32]* %119 = getelementptr &([1000 * i32]* %7)[i64 28]; + [1000 * i32]* %114 = getelementptr &([1000 * i32]* %9)[i64 21]; + i32* %115 = getelementptr &([1000 * i32]* %114)[i64 0][i32 %4]; + store i32* %115 with i32 %113; + [1000 * i32]* %116 = getelementptr &([1000 * i32]* %6)[i64 22]; + i32* %117 = getelementptr &([1000 * i32]* %116)[i64 0][i32 %4]; + i32 %118 = load i32* %117; + [1000 * i32]* %119 = getelementptr &([1000 * i32]* %9)[i64 22]; i32* %120 = getelementptr &([1000 * i32]* %119)[i64 0][i32 %4]; - i32 %121 = load i32* %120; - i32* %122 = getelementptr &(i32* %10)[i64 28]; - store i32* %122 with i32 %121; - [1000 * i32]* %123 = getelementptr &([1000 * i32]* %7)[i64 29]; - i32* %124 = getelementptr &([1000 * i32]* %123)[i64 0][i32 %4]; - i32 %125 = load i32* %124; - i32* %126 = getelementptr &(i32* %10)[i64 29]; - store i32* %126 with i32 %125; - [1000 * i32]* %127 = getelementptr &([1000 * i32]* %7)[i64 30]; - i32* %128 = getelementptr &([1000 * i32]* %127)[i64 0][i32 %4]; - i32 %129 = load i32* %128; - i32* %130 = getelementptr &(i32* %10)[i64 30]; - store i32* %130 with i32 %129; - [1000 * i32]* %131 = getelementptr &([1000 * i32]* %7)[i64 31]; + store i32* %120 with i32 %118; + [1000 * i32]* %121 = getelementptr &([1000 * i32]* %6)[i64 23]; + i32* %122 = getelementptr &([1000 * i32]* %121)[i64 0][i32 %4]; + i32 %123 = load i32* %122; + [1000 * i32]* %124 = getelementptr &([1000 * i32]* %9)[i64 23]; + i32* %125 = getelementptr &([1000 * i32]* %124)[i64 0][i32 %4]; + store i32* %125 with i32 %123; + [1000 * i32]* %126 = getelementptr &([1000 * i32]* %6)[i64 24]; + i32* %127 = getelementptr &([1000 * i32]* %126)[i64 0][i32 %4]; + i32 %128 = load i32* %127; + [1000 * i32]* %129 = getelementptr &([1000 * i32]* %9)[i64 24]; + i32* %130 = getelementptr &([1000 * i32]* %129)[i64 0][i32 %4]; + store i32* %130 with i32 %128; + [1000 * i32]* %131 = getelementptr &([1000 * i32]* %6)[i64 25]; i32* %132 = getelementptr &([1000 * i32]* %131)[i64 0][i32 %4]; i32 %133 = load i32* %132; - i32* %134 = getelementptr &(i32* %10)[i64 31]; - store i32* %134 with i32 %133; - [1000 * i32]* %135 = getelementptr &([1000 * i32]* %7)[i64 32]; - i32* %136 = getelementptr &([1000 * i32]* %135)[i64 0][i32 %4]; - i32 %137 = load i32* %136; - i32* %138 = getelementptr &(i32* %10)[i64 32]; - store i32* %138 with i32 %137; - [1000 * i32]* %139 = getelementptr &([1000 * i32]* %7)[i64 33]; + [1000 * i32]* %134 = getelementptr &([1000 * i32]* %9)[i64 25]; + i32* %135 = getelementptr &([1000 * i32]* %134)[i64 0][i32 %4]; + store i32* %135 with i32 %133; + [1000 * i32]* %136 = getelementptr &([1000 * i32]* %6)[i64 26]; + i32* %137 = getelementptr &([1000 * i32]* %136)[i64 0][i32 %4]; + i32 %138 = load i32* %137; + [1000 * i32]* %139 = getelementptr &([1000 * i32]* %9)[i64 26]; i32* %140 = getelementptr &([1000 * i32]* %139)[i64 0][i32 %4]; - i32 %141 = load i32* %140; - i32* %142 = getelementptr &(i32* %10)[i64 33]; - store i32* %142 with i32 %141; - [1000 * i32]* %143 = getelementptr &([1000 * i32]* %7)[i64 34]; - i32* %144 = getelementptr &([1000 * i32]* %143)[i64 0][i32 %4]; - i32 %145 = load i32* %144; - i32* %146 = getelementptr &(i32* %10)[i64 34]; - store i32* %146 with i32 %145; - [1000 * i32]* %147 = getelementptr &([1000 * i32]* %7)[i64 35]; - i32* %148 = getelementptr &([1000 * i32]* %147)[i64 0][i32 %4]; - i32 %149 = load i32* %148; - i32* %150 = getelementptr &(i32* %10)[i64 35]; - store i32* %150 with i32 %149; - [1000 * i32]* %151 = getelementptr &([1000 * i32]* %7)[i64 36]; + store i32* %140 with i32 %138; + [1000 * i32]* %141 = getelementptr &([1000 * i32]* %6)[i64 27]; + i32* %142 = getelementptr &([1000 * i32]* %141)[i64 0][i32 %4]; + i32 %143 = load i32* %142; + [1000 * i32]* %144 = getelementptr &([1000 * i32]* %9)[i64 27]; + i32* %145 = getelementptr &([1000 * i32]* %144)[i64 0][i32 %4]; + store i32* %145 with i32 %143; + [1000 * i32]* %146 = getelementptr &([1000 * i32]* %6)[i64 28]; + i32* %147 = getelementptr &([1000 * i32]* %146)[i64 0][i32 %4]; + i32 %148 = load i32* %147; + [1000 * i32]* %149 = getelementptr &([1000 * i32]* %9)[i64 28]; + i32* %150 = getelementptr &([1000 * i32]* %149)[i64 0][i32 %4]; + store i32* %150 with i32 %148; + [1000 * i32]* %151 = getelementptr &([1000 * i32]* %6)[i64 29]; i32* %152 = getelementptr &([1000 * i32]* %151)[i64 0][i32 %4]; i32 %153 = load i32* %152; - i32* %154 = getelementptr &(i32* %10)[i64 36]; - store i32* %154 with i32 %153; - [1000 * i32]* %155 = getelementptr &([1000 * i32]* %7)[i64 37]; - i32* %156 = getelementptr &([1000 * i32]* %155)[i64 0][i32 %4]; - i32 %157 = load i32* %156; - i32* %158 = getelementptr &(i32* %10)[i64 37]; - store i32* %158 with i32 %157; - [1000 * i32]* %159 = getelementptr &([1000 * i32]* %7)[i64 38]; + [1000 * i32]* %154 = getelementptr &([1000 * i32]* %9)[i64 29]; + i32* %155 = getelementptr &([1000 * i32]* %154)[i64 0][i32 %4]; + store i32* %155 with i32 %153; + [1000 * i32]* %156 = getelementptr &([1000 * i32]* %6)[i64 30]; + i32* %157 = getelementptr &([1000 * i32]* %156)[i64 0][i32 %4]; + i32 %158 = load i32* %157; + [1000 * i32]* %159 = getelementptr &([1000 * i32]* %9)[i64 30]; i32* %160 = getelementptr &([1000 * i32]* %159)[i64 0][i32 %4]; - i32 %161 = load i32* %160; - i32* %162 = getelementptr &(i32* %10)[i64 38]; - store i32* %162 with i32 %161; - [1000 * i32]* %163 = getelementptr &([1000 * i32]* %7)[i64 39]; - i32* %164 = getelementptr &([1000 * i32]* %163)[i64 0][i32 %4]; - i32 %165 = load i32* %164; - i32* %166 = getelementptr &(i32* %10)[i64 39]; - store i32* %166 with i32 %165; - [1000 * i32]* %167 = getelementptr &([1000 * i32]* %7)[i64 40]; - i32* %168 = getelementptr &([1000 * i32]* %167)[i64 0][i32 %4]; - i32 %169 = load i32* %168; - i32* %170 = getelementptr &(i32* %10)[i64 40]; - store i32* %170 with i32 %169; - [1000 * i32]* %171 = getelementptr &([1000 * i32]* %7)[i64 41]; + store i32* %160 with i32 %158; + [1000 * i32]* %161 = getelementptr &([1000 * i32]* %6)[i64 31]; + i32* %162 = getelementptr &([1000 * i32]* %161)[i64 0][i32 %4]; + i32 %163 = load i32* %162; + [1000 * i32]* %164 = getelementptr &([1000 * i32]* %9)[i64 31]; + i32* %165 = getelementptr &([1000 * i32]* %164)[i64 0][i32 %4]; + store i32* %165 with i32 %163; + [1000 * i32]* %166 = getelementptr &([1000 * i32]* %6)[i64 32]; + i32* %167 = getelementptr &([1000 * i32]* %166)[i64 0][i32 %4]; + i32 %168 = load i32* %167; + [1000 * i32]* %169 = getelementptr &([1000 * i32]* %9)[i64 32]; + i32* %170 = getelementptr &([1000 * i32]* %169)[i64 0][i32 %4]; + store i32* %170 with i32 %168; + [1000 * i32]* %171 = getelementptr &([1000 * i32]* %6)[i64 33]; i32* %172 = getelementptr &([1000 * i32]* %171)[i64 0][i32 %4]; i32 %173 = load i32* %172; - i32* %174 = getelementptr &(i32* %10)[i64 41]; - store i32* %174 with i32 %173; - [1000 * i32]* %175 = getelementptr &([1000 * i32]* %7)[i64 42]; - i32* %176 = getelementptr &([1000 * i32]* %175)[i64 0][i32 %4]; - i32 %177 = load i32* %176; - i32* %178 = getelementptr &(i32* %10)[i64 42]; - store i32* %178 with i32 %177; - [1000 * i32]* %179 = getelementptr &([1000 * i32]* %7)[i64 43]; + [1000 * i32]* %174 = getelementptr &([1000 * i32]* %9)[i64 33]; + i32* %175 = getelementptr &([1000 * i32]* %174)[i64 0][i32 %4]; + store i32* %175 with i32 %173; + [1000 * i32]* %176 = getelementptr &([1000 * i32]* %6)[i64 34]; + i32* %177 = getelementptr &([1000 * i32]* %176)[i64 0][i32 %4]; + i32 %178 = load i32* %177; + [1000 * i32]* %179 = getelementptr &([1000 * i32]* %9)[i64 34]; i32* %180 = getelementptr &([1000 * i32]* %179)[i64 0][i32 %4]; - i32 %181 = load i32* %180; - i32* %182 = getelementptr &(i32* %10)[i64 43]; - store i32* %182 with i32 %181; - [1000 * i32]* %183 = getelementptr &([1000 * i32]* %7)[i64 44]; - i32* %184 = getelementptr &([1000 * i32]* %183)[i64 0][i32 %4]; - i32 %185 = load i32* %184; - i32* %186 = getelementptr &(i32* %10)[i64 44]; - store i32* %186 with i32 %185; - [1000 * i32]* %187 = getelementptr &([1000 * i32]* %7)[i64 45]; - i32* %188 = getelementptr &([1000 * i32]* %187)[i64 0][i32 %4]; - i32 %189 = load i32* %188; - i32* %190 = getelementptr &(i32* %10)[i64 45]; - store i32* %190 with i32 %189; - [1000 * i32]* %191 = getelementptr &([1000 * i32]* %7)[i64 46]; + store i32* %180 with i32 %178; + [1000 * i32]* %181 = getelementptr &([1000 * i32]* %6)[i64 35]; + i32* %182 = getelementptr &([1000 * i32]* %181)[i64 0][i32 %4]; + i32 %183 = load i32* %182; + [1000 * i32]* %184 = getelementptr &([1000 * i32]* %9)[i64 35]; + i32* %185 = getelementptr &([1000 * i32]* %184)[i64 0][i32 %4]; + store i32* %185 with i32 %183; + [1000 * i32]* %186 = getelementptr &([1000 * i32]* %6)[i64 36]; + i32* %187 = getelementptr &([1000 * i32]* %186)[i64 0][i32 %4]; + i32 %188 = load i32* %187; + [1000 * i32]* %189 = getelementptr &([1000 * i32]* %9)[i64 36]; + i32* %190 = getelementptr &([1000 * i32]* %189)[i64 0][i32 %4]; + store i32* %190 with i32 %188; + [1000 * i32]* %191 = getelementptr &([1000 * i32]* %6)[i64 37]; i32* %192 = getelementptr &([1000 * i32]* %191)[i64 0][i32 %4]; i32 %193 = load i32* %192; - i32* %194 = getelementptr &(i32* %10)[i64 46]; - store i32* %194 with i32 %193; - [1000 * i32]* %195 = getelementptr &([1000 * i32]* %7)[i64 47]; - i32* %196 = getelementptr &([1000 * i32]* %195)[i64 0][i32 %4]; - i32 %197 = load i32* %196; - i32* %198 = getelementptr &(i32* %10)[i64 47]; - store i32* %198 with i32 %197; - [1000 * i32]* %199 = getelementptr &([1000 * i32]* %7)[i64 48]; + [1000 * i32]* %194 = getelementptr &([1000 * i32]* %9)[i64 37]; + i32* %195 = getelementptr &([1000 * i32]* %194)[i64 0][i32 %4]; + store i32* %195 with i32 %193; + [1000 * i32]* %196 = getelementptr &([1000 * i32]* %6)[i64 38]; + i32* %197 = getelementptr &([1000 * i32]* %196)[i64 0][i32 %4]; + i32 %198 = load i32* %197; + [1000 * i32]* %199 = getelementptr &([1000 * i32]* %9)[i64 38]; i32* %200 = getelementptr &([1000 * i32]* %199)[i64 0][i32 %4]; - i32 %201 = load i32* %200; - i32* %202 = getelementptr &(i32* %10)[i64 48]; - store i32* %202 with i32 %201; - [1000 * i32]* %203 = getelementptr &([1000 * i32]* %7)[i64 49]; - i32* %204 = getelementptr &([1000 * i32]* %203)[i64 0][i32 %4]; - i32 %205 = load i32* %204; - i32* %206 = getelementptr &(i32* %10)[i64 49]; - store i32* %206 with i32 %205; - [1000 * i32]* %207 = getelementptr &([1000 * i32]* %7)[i64 50]; - i32* %208 = getelementptr &([1000 * i32]* %207)[i64 0][i32 %4]; - i32 %209 = load i32* %208; - i32* %210 = getelementptr &(i32* %10)[i64 50]; - store i32* %210 with i32 %209; - [1000 * i32]* %211 = getelementptr &([1000 * i32]* %7)[i64 51]; + store i32* %200 with i32 %198; + [1000 * i32]* %201 = getelementptr &([1000 * i32]* %6)[i64 39]; + i32* %202 = getelementptr &([1000 * i32]* %201)[i64 0][i32 %4]; + i32 %203 = load i32* %202; + [1000 * i32]* %204 = getelementptr &([1000 * i32]* %9)[i64 39]; + i32* %205 = getelementptr &([1000 * i32]* %204)[i64 0][i32 %4]; + store i32* %205 with i32 %203; + [1000 * i32]* %206 = getelementptr &([1000 * i32]* %6)[i64 40]; + i32* %207 = getelementptr &([1000 * i32]* %206)[i64 0][i32 %4]; + i32 %208 = load i32* %207; + [1000 * i32]* %209 = getelementptr &([1000 * i32]* %9)[i64 40]; + i32* %210 = getelementptr &([1000 * i32]* %209)[i64 0][i32 %4]; + store i32* %210 with i32 %208; + [1000 * i32]* %211 = getelementptr &([1000 * i32]* %6)[i64 41]; i32* %212 = getelementptr &([1000 * i32]* %211)[i64 0][i32 %4]; i32 %213 = load i32* %212; - i32* %214 = getelementptr &(i32* %10)[i64 51]; - store i32* %214 with i32 %213; - [1000 * i32]* %215 = getelementptr &([1000 * i32]* %7)[i64 52]; - i32* %216 = getelementptr &([1000 * i32]* %215)[i64 0][i32 %4]; - i32 %217 = load i32* %216; - i32* %218 = getelementptr &(i32* %10)[i64 52]; - store i32* %218 with i32 %217; - [1000 * i32]* %219 = getelementptr &([1000 * i32]* %7)[i64 53]; + [1000 * i32]* %214 = getelementptr &([1000 * i32]* %9)[i64 41]; + i32* %215 = getelementptr &([1000 * i32]* %214)[i64 0][i32 %4]; + store i32* %215 with i32 %213; + [1000 * i32]* %216 = getelementptr &([1000 * i32]* %6)[i64 42]; + i32* %217 = getelementptr &([1000 * i32]* %216)[i64 0][i32 %4]; + i32 %218 = load i32* %217; + [1000 * i32]* %219 = getelementptr &([1000 * i32]* %9)[i64 42]; i32* %220 = getelementptr &([1000 * i32]* %219)[i64 0][i32 %4]; - i32 %221 = load i32* %220; - i32* %222 = getelementptr &(i32* %10)[i64 53]; - store i32* %222 with i32 %221; - [1000 * i32]* %223 = getelementptr &([1000 * i32]* %7)[i64 54]; - i32* %224 = getelementptr &([1000 * i32]* %223)[i64 0][i32 %4]; - i32 %225 = load i32* %224; - i32* %226 = getelementptr &(i32* %10)[i64 54]; - store i32* %226 with i32 %225; - [1000 * i32]* %227 = getelementptr &([1000 * i32]* %7)[i64 55]; - i32* %228 = getelementptr &([1000 * i32]* %227)[i64 0][i32 %4]; - i32 %229 = load i32* %228; - i32* %230 = getelementptr &(i32* %10)[i64 55]; - store i32* %230 with i32 %229; - [1000 * i32]* %231 = getelementptr &([1000 * i32]* %7)[i64 56]; + store i32* %220 with i32 %218; + [1000 * i32]* %221 = getelementptr &([1000 * i32]* %6)[i64 43]; + i32* %222 = getelementptr &([1000 * i32]* %221)[i64 0][i32 %4]; + i32 %223 = load i32* %222; + [1000 * i32]* %224 = getelementptr &([1000 * i32]* %9)[i64 43]; + i32* %225 = getelementptr &([1000 * i32]* %224)[i64 0][i32 %4]; + store i32* %225 with i32 %223; + [1000 * i32]* %226 = getelementptr &([1000 * i32]* %6)[i64 44]; + i32* %227 = getelementptr &([1000 * i32]* %226)[i64 0][i32 %4]; + i32 %228 = load i32* %227; + [1000 * i32]* %229 = getelementptr &([1000 * i32]* %9)[i64 44]; + i32* %230 = getelementptr &([1000 * i32]* %229)[i64 0][i32 %4]; + store i32* %230 with i32 %228; + [1000 * i32]* %231 = getelementptr &([1000 * i32]* %6)[i64 45]; i32* %232 = getelementptr &([1000 * i32]* %231)[i64 0][i32 %4]; i32 %233 = load i32* %232; - i32* %234 = getelementptr &(i32* %10)[i64 56]; - store i32* %234 with i32 %233; - [1000 * i32]* %235 = getelementptr &([1000 * i32]* %7)[i64 57]; - i32* %236 = getelementptr &([1000 * i32]* %235)[i64 0][i32 %4]; - i32 %237 = load i32* %236; - i32* %238 = getelementptr &(i32* %10)[i64 57]; - store i32* %238 with i32 %237; - [1000 * i32]* %239 = getelementptr &([1000 * i32]* %7)[i64 58]; + [1000 * i32]* %234 = getelementptr &([1000 * i32]* %9)[i64 45]; + i32* %235 = getelementptr &([1000 * i32]* %234)[i64 0][i32 %4]; + store i32* %235 with i32 %233; + [1000 * i32]* %236 = getelementptr &([1000 * i32]* %6)[i64 46]; + i32* %237 = getelementptr &([1000 * i32]* %236)[i64 0][i32 %4]; + i32 %238 = load i32* %237; + [1000 * i32]* %239 = getelementptr &([1000 * i32]* %9)[i64 46]; i32* %240 = getelementptr &([1000 * i32]* %239)[i64 0][i32 %4]; - i32 %241 = load i32* %240; - i32* %242 = getelementptr &(i32* %10)[i64 58]; - store i32* %242 with i32 %241; - [1000 * i32]* %243 = getelementptr &([1000 * i32]* %7)[i64 59]; - i32* %244 = getelementptr &([1000 * i32]* %243)[i64 0][i32 %4]; - i32 %245 = load i32* %244; - i32* %246 = getelementptr &(i32* %10)[i64 59]; - store i32* %246 with i32 %245; - [1000 * i32]* %247 = getelementptr &([1000 * i32]* %7)[i64 60]; - i32* %248 = getelementptr &([1000 * i32]* %247)[i64 0][i32 %4]; - i32 %249 = load i32* %248; - i32* %250 = getelementptr &(i32* %10)[i64 60]; - store i32* %250 with i32 %249; - [1000 * i32]* %251 = getelementptr &([1000 * i32]* %7)[i64 61]; + store i32* %240 with i32 %238; + [1000 * i32]* %241 = getelementptr &([1000 * i32]* %6)[i64 47]; + i32* %242 = getelementptr &([1000 * i32]* %241)[i64 0][i32 %4]; + i32 %243 = load i32* %242; + [1000 * i32]* %244 = getelementptr &([1000 * i32]* %9)[i64 47]; + i32* %245 = getelementptr &([1000 * i32]* %244)[i64 0][i32 %4]; + store i32* %245 with i32 %243; + [1000 * i32]* %246 = getelementptr &([1000 * i32]* %6)[i64 48]; + i32* %247 = getelementptr &([1000 * i32]* %246)[i64 0][i32 %4]; + i32 %248 = load i32* %247; + [1000 * i32]* %249 = getelementptr &([1000 * i32]* %9)[i64 48]; + i32* %250 = getelementptr &([1000 * i32]* %249)[i64 0][i32 %4]; + store i32* %250 with i32 %248; + [1000 * i32]* %251 = getelementptr &([1000 * i32]* %6)[i64 49]; i32* %252 = getelementptr &([1000 * i32]* %251)[i64 0][i32 %4]; i32 %253 = load i32* %252; - i32* %254 = getelementptr &(i32* %10)[i64 61]; - store i32* %254 with i32 %253; - [1000 * i32]* %255 = getelementptr &([1000 * i32]* %7)[i64 62]; - i32* %256 = getelementptr &([1000 * i32]* %255)[i64 0][i32 %4]; - i32 %257 = load i32* %256; - i32* %258 = getelementptr &(i32* %10)[i64 62]; - store i32* %258 with i32 %257; - [1000 * i32]* %259 = getelementptr &([1000 * i32]* %7)[i64 63]; + [1000 * i32]* %254 = getelementptr &([1000 * i32]* %9)[i64 49]; + i32* %255 = getelementptr &([1000 * i32]* %254)[i64 0][i32 %4]; + store i32* %255 with i32 %253; + [1000 * i32]* %256 = getelementptr &([1000 * i32]* %6)[i64 50]; + i32* %257 = getelementptr &([1000 * i32]* %256)[i64 0][i32 %4]; + i32 %258 = load i32* %257; + [1000 * i32]* %259 = getelementptr &([1000 * i32]* %9)[i64 50]; i32* %260 = getelementptr &([1000 * i32]* %259)[i64 0][i32 %4]; - i32 %261 = load i32* %260; - i32* %262 = getelementptr &(i32* %10)[i64 63]; - store i32* %262 with i32 %261; - i32 %263 = add i32 %6, i32 64; - i1 %264 = icmp slt i32 %263, i32 960; - cbr i1 %264(prob = 0.933333), ^while.body, ^while.body1; - ^while.body1: - [1000 * i32]* %265 = getelementptr &([1000 * i32]* %7)[i64 64]; - i32* %266 = getelementptr &([1000 * i32]* %265)[i64 0][i32 %4]; - i32 %267 = load i32* %266; - i32* %268 = getelementptr &(i32* %10)[i64 64]; - store i32* %268 with i32 %267; - [1000 * i32]* %269 = getelementptr &([1000 * i32]* %7)[i64 65]; + store i32* %260 with i32 %258; + [1000 * i32]* %261 = getelementptr &([1000 * i32]* %6)[i64 51]; + i32* %262 = getelementptr &([1000 * i32]* %261)[i64 0][i32 %4]; + i32 %263 = load i32* %262; + [1000 * i32]* %264 = getelementptr &([1000 * i32]* %9)[i64 51]; + i32* %265 = getelementptr &([1000 * i32]* %264)[i64 0][i32 %4]; + store i32* %265 with i32 %263; + [1000 * i32]* %266 = getelementptr &([1000 * i32]* %6)[i64 52]; + i32* %267 = getelementptr &([1000 * i32]* %266)[i64 0][i32 %4]; + i32 %268 = load i32* %267; + [1000 * i32]* %269 = getelementptr &([1000 * i32]* %9)[i64 52]; i32* %270 = getelementptr &([1000 * i32]* %269)[i64 0][i32 %4]; - i32 %271 = load i32* %270; - i32* %272 = getelementptr &(i32* %10)[i64 65]; - store i32* %272 with i32 %271; - [1000 * i32]* %273 = getelementptr &([1000 * i32]* %7)[i64 66]; - i32* %274 = getelementptr &([1000 * i32]* %273)[i64 0][i32 %4]; - i32 %275 = load i32* %274; - i32* %276 = getelementptr &(i32* %10)[i64 66]; - store i32* %276 with i32 %275; - [1000 * i32]* %277 = getelementptr &([1000 * i32]* %7)[i64 67]; - i32* %278 = getelementptr &([1000 * i32]* %277)[i64 0][i32 %4]; - i32 %279 = load i32* %278; - i32* %280 = getelementptr &(i32* %10)[i64 67]; - store i32* %280 with i32 %279; - [1000 * i32]* %281 = getelementptr &([1000 * i32]* %7)[i64 68]; + store i32* %270 with i32 %268; + [1000 * i32]* %271 = getelementptr &([1000 * i32]* %6)[i64 53]; + i32* %272 = getelementptr &([1000 * i32]* %271)[i64 0][i32 %4]; + i32 %273 = load i32* %272; + [1000 * i32]* %274 = getelementptr &([1000 * i32]* %9)[i64 53]; + i32* %275 = getelementptr &([1000 * i32]* %274)[i64 0][i32 %4]; + store i32* %275 with i32 %273; + [1000 * i32]* %276 = getelementptr &([1000 * i32]* %6)[i64 54]; + i32* %277 = getelementptr &([1000 * i32]* %276)[i64 0][i32 %4]; + i32 %278 = load i32* %277; + [1000 * i32]* %279 = getelementptr &([1000 * i32]* %9)[i64 54]; + i32* %280 = getelementptr &([1000 * i32]* %279)[i64 0][i32 %4]; + store i32* %280 with i32 %278; + [1000 * i32]* %281 = getelementptr &([1000 * i32]* %6)[i64 55]; i32* %282 = getelementptr &([1000 * i32]* %281)[i64 0][i32 %4]; i32 %283 = load i32* %282; - i32* %284 = getelementptr &(i32* %10)[i64 68]; - store i32* %284 with i32 %283; - [1000 * i32]* %285 = getelementptr &([1000 * i32]* %7)[i64 69]; - i32* %286 = getelementptr &([1000 * i32]* %285)[i64 0][i32 %4]; - i32 %287 = load i32* %286; - i32* %288 = getelementptr &(i32* %10)[i64 69]; - store i32* %288 with i32 %287; - [1000 * i32]* %289 = getelementptr &([1000 * i32]* %7)[i64 70]; + [1000 * i32]* %284 = getelementptr &([1000 * i32]* %9)[i64 55]; + i32* %285 = getelementptr &([1000 * i32]* %284)[i64 0][i32 %4]; + store i32* %285 with i32 %283; + [1000 * i32]* %286 = getelementptr &([1000 * i32]* %6)[i64 56]; + i32* %287 = getelementptr &([1000 * i32]* %286)[i64 0][i32 %4]; + i32 %288 = load i32* %287; + [1000 * i32]* %289 = getelementptr &([1000 * i32]* %9)[i64 56]; i32* %290 = getelementptr &([1000 * i32]* %289)[i64 0][i32 %4]; - i32 %291 = load i32* %290; - i32* %292 = getelementptr &(i32* %10)[i64 70]; - store i32* %292 with i32 %291; - [1000 * i32]* %293 = getelementptr &([1000 * i32]* %7)[i64 71]; - i32* %294 = getelementptr &([1000 * i32]* %293)[i64 0][i32 %4]; - i32 %295 = load i32* %294; - i32* %296 = getelementptr &(i32* %10)[i64 71]; - store i32* %296 with i32 %295; - [1000 * i32]* %297 = getelementptr &([1000 * i32]* %7)[i64 72]; - i32* %298 = getelementptr &([1000 * i32]* %297)[i64 0][i32 %4]; - i32 %299 = load i32* %298; - i32* %300 = getelementptr &(i32* %10)[i64 72]; - store i32* %300 with i32 %299; - [1000 * i32]* %301 = getelementptr &([1000 * i32]* %7)[i64 73]; + store i32* %290 with i32 %288; + [1000 * i32]* %291 = getelementptr &([1000 * i32]* %6)[i64 57]; + i32* %292 = getelementptr &([1000 * i32]* %291)[i64 0][i32 %4]; + i32 %293 = load i32* %292; + [1000 * i32]* %294 = getelementptr &([1000 * i32]* %9)[i64 57]; + i32* %295 = getelementptr &([1000 * i32]* %294)[i64 0][i32 %4]; + store i32* %295 with i32 %293; + [1000 * i32]* %296 = getelementptr &([1000 * i32]* %6)[i64 58]; + i32* %297 = getelementptr &([1000 * i32]* %296)[i64 0][i32 %4]; + i32 %298 = load i32* %297; + [1000 * i32]* %299 = getelementptr &([1000 * i32]* %9)[i64 58]; + i32* %300 = getelementptr &([1000 * i32]* %299)[i64 0][i32 %4]; + store i32* %300 with i32 %298; + [1000 * i32]* %301 = getelementptr &([1000 * i32]* %6)[i64 59]; i32* %302 = getelementptr &([1000 * i32]* %301)[i64 0][i32 %4]; i32 %303 = load i32* %302; - i32* %304 = getelementptr &(i32* %10)[i64 73]; - store i32* %304 with i32 %303; - [1000 * i32]* %305 = getelementptr &([1000 * i32]* %7)[i64 74]; - i32* %306 = getelementptr &([1000 * i32]* %305)[i64 0][i32 %4]; - i32 %307 = load i32* %306; - i32* %308 = getelementptr &(i32* %10)[i64 74]; - store i32* %308 with i32 %307; - [1000 * i32]* %309 = getelementptr &([1000 * i32]* %7)[i64 75]; + [1000 * i32]* %304 = getelementptr &([1000 * i32]* %9)[i64 59]; + i32* %305 = getelementptr &([1000 * i32]* %304)[i64 0][i32 %4]; + store i32* %305 with i32 %303; + [1000 * i32]* %306 = getelementptr &([1000 * i32]* %6)[i64 60]; + i32* %307 = getelementptr &([1000 * i32]* %306)[i64 0][i32 %4]; + i32 %308 = load i32* %307; + [1000 * i32]* %309 = getelementptr &([1000 * i32]* %9)[i64 60]; i32* %310 = getelementptr &([1000 * i32]* %309)[i64 0][i32 %4]; - i32 %311 = load i32* %310; - i32* %312 = getelementptr &(i32* %10)[i64 75]; - store i32* %312 with i32 %311; - [1000 * i32]* %313 = getelementptr &([1000 * i32]* %7)[i64 76]; - i32* %314 = getelementptr &([1000 * i32]* %313)[i64 0][i32 %4]; - i32 %315 = load i32* %314; - i32* %316 = getelementptr &(i32* %10)[i64 76]; - store i32* %316 with i32 %315; - [1000 * i32]* %317 = getelementptr &([1000 * i32]* %7)[i64 77]; - i32* %318 = getelementptr &([1000 * i32]* %317)[i64 0][i32 %4]; - i32 %319 = load i32* %318; - i32* %320 = getelementptr &(i32* %10)[i64 77]; - store i32* %320 with i32 %319; - [1000 * i32]* %321 = getelementptr &([1000 * i32]* %7)[i64 78]; + store i32* %310 with i32 %308; + [1000 * i32]* %311 = getelementptr &([1000 * i32]* %6)[i64 61]; + i32* %312 = getelementptr &([1000 * i32]* %311)[i64 0][i32 %4]; + i32 %313 = load i32* %312; + [1000 * i32]* %314 = getelementptr &([1000 * i32]* %9)[i64 61]; + i32* %315 = getelementptr &([1000 * i32]* %314)[i64 0][i32 %4]; + store i32* %315 with i32 %313; + [1000 * i32]* %316 = getelementptr &([1000 * i32]* %6)[i64 62]; + i32* %317 = getelementptr &([1000 * i32]* %316)[i64 0][i32 %4]; + i32 %318 = load i32* %317; + [1000 * i32]* %319 = getelementptr &([1000 * i32]* %9)[i64 62]; + i32* %320 = getelementptr &([1000 * i32]* %319)[i64 0][i32 %4]; + store i32* %320 with i32 %318; + [1000 * i32]* %321 = getelementptr &([1000 * i32]* %6)[i64 63]; i32* %322 = getelementptr &([1000 * i32]* %321)[i64 0][i32 %4]; i32 %323 = load i32* %322; - i32* %324 = getelementptr &(i32* %10)[i64 78]; - store i32* %324 with i32 %323; - [1000 * i32]* %325 = getelementptr &([1000 * i32]* %7)[i64 79]; - i32* %326 = getelementptr &([1000 * i32]* %325)[i64 0][i32 %4]; - i32 %327 = load i32* %326; - i32* %328 = getelementptr &(i32* %10)[i64 79]; - store i32* %328 with i32 %327; - [1000 * i32]* %329 = getelementptr &([1000 * i32]* %7)[i64 80]; - i32* %330 = getelementptr &([1000 * i32]* %329)[i64 0][i32 %4]; - i32 %331 = load i32* %330; - i32* %332 = getelementptr &(i32* %10)[i64 80]; - store i32* %332 with i32 %331; - [1000 * i32]* %333 = getelementptr &([1000 * i32]* %7)[i64 81]; + [1000 * i32]* %324 = getelementptr &([1000 * i32]* %9)[i64 63]; + i32* %325 = getelementptr &([1000 * i32]* %324)[i64 0][i32 %4]; + store i32* %325 with i32 %323; + i32 %326 = add i32 %5, i32 64; + i1 %327 = icmp slt i32 %326, i32 960; + cbr i1 %327(prob = 0.933333), ^while.body, ^while.body1; + ^while.body1: + [1000 * i32]* %328 = getelementptr &([1000 * i32]* %6)[i64 64]; + i32* %329 = getelementptr &([1000 * i32]* %328)[i64 0][i32 %4]; + i32 %330 = load i32* %329; + [1000 * i32]* %331 = getelementptr &([1000 * i32]* %9)[i64 64]; + i32* %332 = getelementptr &([1000 * i32]* %331)[i64 0][i32 %4]; + store i32* %332 with i32 %330; + [1000 * i32]* %333 = getelementptr &([1000 * i32]* %6)[i64 65]; i32* %334 = getelementptr &([1000 * i32]* %333)[i64 0][i32 %4]; i32 %335 = load i32* %334; - i32* %336 = getelementptr &(i32* %10)[i64 81]; - store i32* %336 with i32 %335; - [1000 * i32]* %337 = getelementptr &([1000 * i32]* %7)[i64 82]; - i32* %338 = getelementptr &([1000 * i32]* %337)[i64 0][i32 %4]; - i32 %339 = load i32* %338; - i32* %340 = getelementptr &(i32* %10)[i64 82]; - store i32* %340 with i32 %339; - [1000 * i32]* %341 = getelementptr &([1000 * i32]* %7)[i64 83]; + [1000 * i32]* %336 = getelementptr &([1000 * i32]* %9)[i64 65]; + i32* %337 = getelementptr &([1000 * i32]* %336)[i64 0][i32 %4]; + store i32* %337 with i32 %335; + [1000 * i32]* %338 = getelementptr &([1000 * i32]* %6)[i64 66]; + i32* %339 = getelementptr &([1000 * i32]* %338)[i64 0][i32 %4]; + i32 %340 = load i32* %339; + [1000 * i32]* %341 = getelementptr &([1000 * i32]* %9)[i64 66]; i32* %342 = getelementptr &([1000 * i32]* %341)[i64 0][i32 %4]; - i32 %343 = load i32* %342; - i32* %344 = getelementptr &(i32* %10)[i64 83]; - store i32* %344 with i32 %343; - [1000 * i32]* %345 = getelementptr &([1000 * i32]* %7)[i64 84]; - i32* %346 = getelementptr &([1000 * i32]* %345)[i64 0][i32 %4]; - i32 %347 = load i32* %346; - i32* %348 = getelementptr &(i32* %10)[i64 84]; - store i32* %348 with i32 %347; - [1000 * i32]* %349 = getelementptr &([1000 * i32]* %7)[i64 85]; - i32* %350 = getelementptr &([1000 * i32]* %349)[i64 0][i32 %4]; - i32 %351 = load i32* %350; - i32* %352 = getelementptr &(i32* %10)[i64 85]; - store i32* %352 with i32 %351; - [1000 * i32]* %353 = getelementptr &([1000 * i32]* %7)[i64 86]; + store i32* %342 with i32 %340; + [1000 * i32]* %343 = getelementptr &([1000 * i32]* %6)[i64 67]; + i32* %344 = getelementptr &([1000 * i32]* %343)[i64 0][i32 %4]; + i32 %345 = load i32* %344; + [1000 * i32]* %346 = getelementptr &([1000 * i32]* %9)[i64 67]; + i32* %347 = getelementptr &([1000 * i32]* %346)[i64 0][i32 %4]; + store i32* %347 with i32 %345; + [1000 * i32]* %348 = getelementptr &([1000 * i32]* %6)[i64 68]; + i32* %349 = getelementptr &([1000 * i32]* %348)[i64 0][i32 %4]; + i32 %350 = load i32* %349; + [1000 * i32]* %351 = getelementptr &([1000 * i32]* %9)[i64 68]; + i32* %352 = getelementptr &([1000 * i32]* %351)[i64 0][i32 %4]; + store i32* %352 with i32 %350; + [1000 * i32]* %353 = getelementptr &([1000 * i32]* %6)[i64 69]; i32* %354 = getelementptr &([1000 * i32]* %353)[i64 0][i32 %4]; i32 %355 = load i32* %354; - i32* %356 = getelementptr &(i32* %10)[i64 86]; - store i32* %356 with i32 %355; - [1000 * i32]* %357 = getelementptr &([1000 * i32]* %7)[i64 87]; - i32* %358 = getelementptr &([1000 * i32]* %357)[i64 0][i32 %4]; - i32 %359 = load i32* %358; - i32* %360 = getelementptr &(i32* %10)[i64 87]; - store i32* %360 with i32 %359; - [1000 * i32]* %361 = getelementptr &([1000 * i32]* %7)[i64 88]; + [1000 * i32]* %356 = getelementptr &([1000 * i32]* %9)[i64 69]; + i32* %357 = getelementptr &([1000 * i32]* %356)[i64 0][i32 %4]; + store i32* %357 with i32 %355; + [1000 * i32]* %358 = getelementptr &([1000 * i32]* %6)[i64 70]; + i32* %359 = getelementptr &([1000 * i32]* %358)[i64 0][i32 %4]; + i32 %360 = load i32* %359; + [1000 * i32]* %361 = getelementptr &([1000 * i32]* %9)[i64 70]; i32* %362 = getelementptr &([1000 * i32]* %361)[i64 0][i32 %4]; - i32 %363 = load i32* %362; - i32* %364 = getelementptr &(i32* %10)[i64 88]; - store i32* %364 with i32 %363; - [1000 * i32]* %365 = getelementptr &([1000 * i32]* %7)[i64 89]; - i32* %366 = getelementptr &([1000 * i32]* %365)[i64 0][i32 %4]; - i32 %367 = load i32* %366; - i32* %368 = getelementptr &(i32* %10)[i64 89]; - store i32* %368 with i32 %367; - [1000 * i32]* %369 = getelementptr &([1000 * i32]* %7)[i64 90]; - i32* %370 = getelementptr &([1000 * i32]* %369)[i64 0][i32 %4]; - i32 %371 = load i32* %370; - i32* %372 = getelementptr &(i32* %10)[i64 90]; - store i32* %372 with i32 %371; - [1000 * i32]* %373 = getelementptr &([1000 * i32]* %7)[i64 91]; + store i32* %362 with i32 %360; + [1000 * i32]* %363 = getelementptr &([1000 * i32]* %6)[i64 71]; + i32* %364 = getelementptr &([1000 * i32]* %363)[i64 0][i32 %4]; + i32 %365 = load i32* %364; + [1000 * i32]* %366 = getelementptr &([1000 * i32]* %9)[i64 71]; + i32* %367 = getelementptr &([1000 * i32]* %366)[i64 0][i32 %4]; + store i32* %367 with i32 %365; + [1000 * i32]* %368 = getelementptr &([1000 * i32]* %6)[i64 72]; + i32* %369 = getelementptr &([1000 * i32]* %368)[i64 0][i32 %4]; + i32 %370 = load i32* %369; + [1000 * i32]* %371 = getelementptr &([1000 * i32]* %9)[i64 72]; + i32* %372 = getelementptr &([1000 * i32]* %371)[i64 0][i32 %4]; + store i32* %372 with i32 %370; + [1000 * i32]* %373 = getelementptr &([1000 * i32]* %6)[i64 73]; i32* %374 = getelementptr &([1000 * i32]* %373)[i64 0][i32 %4]; i32 %375 = load i32* %374; - i32* %376 = getelementptr &(i32* %10)[i64 91]; - store i32* %376 with i32 %375; - [1000 * i32]* %377 = getelementptr &([1000 * i32]* %7)[i64 92]; - i32* %378 = getelementptr &([1000 * i32]* %377)[i64 0][i32 %4]; - i32 %379 = load i32* %378; - i32* %380 = getelementptr &(i32* %10)[i64 92]; - store i32* %380 with i32 %379; - [1000 * i32]* %381 = getelementptr &([1000 * i32]* %7)[i64 93]; + [1000 * i32]* %376 = getelementptr &([1000 * i32]* %9)[i64 73]; + i32* %377 = getelementptr &([1000 * i32]* %376)[i64 0][i32 %4]; + store i32* %377 with i32 %375; + [1000 * i32]* %378 = getelementptr &([1000 * i32]* %6)[i64 74]; + i32* %379 = getelementptr &([1000 * i32]* %378)[i64 0][i32 %4]; + i32 %380 = load i32* %379; + [1000 * i32]* %381 = getelementptr &([1000 * i32]* %9)[i64 74]; i32* %382 = getelementptr &([1000 * i32]* %381)[i64 0][i32 %4]; - i32 %383 = load i32* %382; - i32* %384 = getelementptr &(i32* %10)[i64 93]; - store i32* %384 with i32 %383; - [1000 * i32]* %385 = getelementptr &([1000 * i32]* %7)[i64 94]; - i32* %386 = getelementptr &([1000 * i32]* %385)[i64 0][i32 %4]; - i32 %387 = load i32* %386; - i32* %388 = getelementptr &(i32* %10)[i64 94]; - store i32* %388 with i32 %387; - [1000 * i32]* %389 = getelementptr &([1000 * i32]* %7)[i64 95]; - i32* %390 = getelementptr &([1000 * i32]* %389)[i64 0][i32 %4]; - i32 %391 = load i32* %390; - i32* %392 = getelementptr &(i32* %10)[i64 95]; - store i32* %392 with i32 %391; - [1000 * i32]* %393 = getelementptr &([1000 * i32]* %7)[i64 96]; + store i32* %382 with i32 %380; + [1000 * i32]* %383 = getelementptr &([1000 * i32]* %6)[i64 75]; + i32* %384 = getelementptr &([1000 * i32]* %383)[i64 0][i32 %4]; + i32 %385 = load i32* %384; + [1000 * i32]* %386 = getelementptr &([1000 * i32]* %9)[i64 75]; + i32* %387 = getelementptr &([1000 * i32]* %386)[i64 0][i32 %4]; + store i32* %387 with i32 %385; + [1000 * i32]* %388 = getelementptr &([1000 * i32]* %6)[i64 76]; + i32* %389 = getelementptr &([1000 * i32]* %388)[i64 0][i32 %4]; + i32 %390 = load i32* %389; + [1000 * i32]* %391 = getelementptr &([1000 * i32]* %9)[i64 76]; + i32* %392 = getelementptr &([1000 * i32]* %391)[i64 0][i32 %4]; + store i32* %392 with i32 %390; + [1000 * i32]* %393 = getelementptr &([1000 * i32]* %6)[i64 77]; i32* %394 = getelementptr &([1000 * i32]* %393)[i64 0][i32 %4]; i32 %395 = load i32* %394; - i32* %396 = getelementptr &(i32* %10)[i64 96]; - store i32* %396 with i32 %395; - [1000 * i32]* %397 = getelementptr &([1000 * i32]* %7)[i64 97]; - i32* %398 = getelementptr &([1000 * i32]* %397)[i64 0][i32 %4]; - i32 %399 = load i32* %398; - i32* %400 = getelementptr &(i32* %10)[i64 97]; - store i32* %400 with i32 %399; - [1000 * i32]* %401 = getelementptr &([1000 * i32]* %7)[i64 98]; + [1000 * i32]* %396 = getelementptr &([1000 * i32]* %9)[i64 77]; + i32* %397 = getelementptr &([1000 * i32]* %396)[i64 0][i32 %4]; + store i32* %397 with i32 %395; + [1000 * i32]* %398 = getelementptr &([1000 * i32]* %6)[i64 78]; + i32* %399 = getelementptr &([1000 * i32]* %398)[i64 0][i32 %4]; + i32 %400 = load i32* %399; + [1000 * i32]* %401 = getelementptr &([1000 * i32]* %9)[i64 78]; i32* %402 = getelementptr &([1000 * i32]* %401)[i64 0][i32 %4]; - i32 %403 = load i32* %402; - i32* %404 = getelementptr &(i32* %10)[i64 98]; - store i32* %404 with i32 %403; - [1000 * i32]* %405 = getelementptr &([1000 * i32]* %7)[i64 99]; - i32* %406 = getelementptr &([1000 * i32]* %405)[i64 0][i32 %4]; - i32 %407 = load i32* %406; - i32* %408 = getelementptr &(i32* %10)[i64 99]; - store i32* %408 with i32 %407; - [1000 * i32]* %409 = getelementptr &([1000 * i32]* %7)[i64 100]; - i32* %410 = getelementptr &([1000 * i32]* %409)[i64 0][i32 %4]; - i32 %411 = load i32* %410; - i32* %412 = getelementptr &(i32* %10)[i64 100]; - store i32* %412 with i32 %411; - [1000 * i32]* %413 = getelementptr &([1000 * i32]* %7)[i64 101]; + store i32* %402 with i32 %400; + [1000 * i32]* %403 = getelementptr &([1000 * i32]* %6)[i64 79]; + i32* %404 = getelementptr &([1000 * i32]* %403)[i64 0][i32 %4]; + i32 %405 = load i32* %404; + [1000 * i32]* %406 = getelementptr &([1000 * i32]* %9)[i64 79]; + i32* %407 = getelementptr &([1000 * i32]* %406)[i64 0][i32 %4]; + store i32* %407 with i32 %405; + [1000 * i32]* %408 = getelementptr &([1000 * i32]* %6)[i64 80]; + i32* %409 = getelementptr &([1000 * i32]* %408)[i64 0][i32 %4]; + i32 %410 = load i32* %409; + [1000 * i32]* %411 = getelementptr &([1000 * i32]* %9)[i64 80]; + i32* %412 = getelementptr &([1000 * i32]* %411)[i64 0][i32 %4]; + store i32* %412 with i32 %410; + [1000 * i32]* %413 = getelementptr &([1000 * i32]* %6)[i64 81]; i32* %414 = getelementptr &([1000 * i32]* %413)[i64 0][i32 %4]; i32 %415 = load i32* %414; - i32* %416 = getelementptr &(i32* %10)[i64 101]; - store i32* %416 with i32 %415; - [1000 * i32]* %417 = getelementptr &([1000 * i32]* %7)[i64 102]; - i32* %418 = getelementptr &([1000 * i32]* %417)[i64 0][i32 %4]; - i32 %419 = load i32* %418; - i32* %420 = getelementptr &(i32* %10)[i64 102]; - store i32* %420 with i32 %419; - [1000 * i32]* %421 = getelementptr &([1000 * i32]* %7)[i64 103]; + [1000 * i32]* %416 = getelementptr &([1000 * i32]* %9)[i64 81]; + i32* %417 = getelementptr &([1000 * i32]* %416)[i64 0][i32 %4]; + store i32* %417 with i32 %415; + [1000 * i32]* %418 = getelementptr &([1000 * i32]* %6)[i64 82]; + i32* %419 = getelementptr &([1000 * i32]* %418)[i64 0][i32 %4]; + i32 %420 = load i32* %419; + [1000 * i32]* %421 = getelementptr &([1000 * i32]* %9)[i64 82]; i32* %422 = getelementptr &([1000 * i32]* %421)[i64 0][i32 %4]; - i32 %423 = load i32* %422; - i32* %424 = getelementptr &(i32* %10)[i64 103]; - store i32* %424 with i32 %423; - i32 %425 = add i32 %4, i32 1; - i1 %426 = icmp sgt i32 %1, i32 %425; - cbr i1 %426(prob = 0.984615), ^b1, ^b2; + store i32* %422 with i32 %420; + [1000 * i32]* %423 = getelementptr &([1000 * i32]* %6)[i64 83]; + i32* %424 = getelementptr &([1000 * i32]* %423)[i64 0][i32 %4]; + i32 %425 = load i32* %424; + [1000 * i32]* %426 = getelementptr &([1000 * i32]* %9)[i64 83]; + i32* %427 = getelementptr &([1000 * i32]* %426)[i64 0][i32 %4]; + store i32* %427 with i32 %425; + [1000 * i32]* %428 = getelementptr &([1000 * i32]* %6)[i64 84]; + i32* %429 = getelementptr &([1000 * i32]* %428)[i64 0][i32 %4]; + i32 %430 = load i32* %429; + [1000 * i32]* %431 = getelementptr &([1000 * i32]* %9)[i64 84]; + i32* %432 = getelementptr &([1000 * i32]* %431)[i64 0][i32 %4]; + store i32* %432 with i32 %430; + [1000 * i32]* %433 = getelementptr &([1000 * i32]* %6)[i64 85]; + i32* %434 = getelementptr &([1000 * i32]* %433)[i64 0][i32 %4]; + i32 %435 = load i32* %434; + [1000 * i32]* %436 = getelementptr &([1000 * i32]* %9)[i64 85]; + i32* %437 = getelementptr &([1000 * i32]* %436)[i64 0][i32 %4]; + store i32* %437 with i32 %435; + [1000 * i32]* %438 = getelementptr &([1000 * i32]* %6)[i64 86]; + i32* %439 = getelementptr &([1000 * i32]* %438)[i64 0][i32 %4]; + i32 %440 = load i32* %439; + [1000 * i32]* %441 = getelementptr &([1000 * i32]* %9)[i64 86]; + i32* %442 = getelementptr &([1000 * i32]* %441)[i64 0][i32 %4]; + store i32* %442 with i32 %440; + [1000 * i32]* %443 = getelementptr &([1000 * i32]* %6)[i64 87]; + i32* %444 = getelementptr &([1000 * i32]* %443)[i64 0][i32 %4]; + i32 %445 = load i32* %444; + [1000 * i32]* %446 = getelementptr &([1000 * i32]* %9)[i64 87]; + i32* %447 = getelementptr &([1000 * i32]* %446)[i64 0][i32 %4]; + store i32* %447 with i32 %445; + [1000 * i32]* %448 = getelementptr &([1000 * i32]* %6)[i64 88]; + i32* %449 = getelementptr &([1000 * i32]* %448)[i64 0][i32 %4]; + i32 %450 = load i32* %449; + [1000 * i32]* %451 = getelementptr &([1000 * i32]* %9)[i64 88]; + i32* %452 = getelementptr &([1000 * i32]* %451)[i64 0][i32 %4]; + store i32* %452 with i32 %450; + [1000 * i32]* %453 = getelementptr &([1000 * i32]* %6)[i64 89]; + i32* %454 = getelementptr &([1000 * i32]* %453)[i64 0][i32 %4]; + i32 %455 = load i32* %454; + [1000 * i32]* %456 = getelementptr &([1000 * i32]* %9)[i64 89]; + i32* %457 = getelementptr &([1000 * i32]* %456)[i64 0][i32 %4]; + store i32* %457 with i32 %455; + [1000 * i32]* %458 = getelementptr &([1000 * i32]* %6)[i64 90]; + i32* %459 = getelementptr &([1000 * i32]* %458)[i64 0][i32 %4]; + i32 %460 = load i32* %459; + [1000 * i32]* %461 = getelementptr &([1000 * i32]* %9)[i64 90]; + i32* %462 = getelementptr &([1000 * i32]* %461)[i64 0][i32 %4]; + store i32* %462 with i32 %460; + [1000 * i32]* %463 = getelementptr &([1000 * i32]* %6)[i64 91]; + i32* %464 = getelementptr &([1000 * i32]* %463)[i64 0][i32 %4]; + i32 %465 = load i32* %464; + [1000 * i32]* %466 = getelementptr &([1000 * i32]* %9)[i64 91]; + i32* %467 = getelementptr &([1000 * i32]* %466)[i64 0][i32 %4]; + store i32* %467 with i32 %465; + [1000 * i32]* %468 = getelementptr &([1000 * i32]* %6)[i64 92]; + i32* %469 = getelementptr &([1000 * i32]* %468)[i64 0][i32 %4]; + i32 %470 = load i32* %469; + [1000 * i32]* %471 = getelementptr &([1000 * i32]* %9)[i64 92]; + i32* %472 = getelementptr &([1000 * i32]* %471)[i64 0][i32 %4]; + store i32* %472 with i32 %470; + [1000 * i32]* %473 = getelementptr &([1000 * i32]* %6)[i64 93]; + i32* %474 = getelementptr &([1000 * i32]* %473)[i64 0][i32 %4]; + i32 %475 = load i32* %474; + [1000 * i32]* %476 = getelementptr &([1000 * i32]* %9)[i64 93]; + i32* %477 = getelementptr &([1000 * i32]* %476)[i64 0][i32 %4]; + store i32* %477 with i32 %475; + [1000 * i32]* %478 = getelementptr &([1000 * i32]* %6)[i64 94]; + i32* %479 = getelementptr &([1000 * i32]* %478)[i64 0][i32 %4]; + i32 %480 = load i32* %479; + [1000 * i32]* %481 = getelementptr &([1000 * i32]* %9)[i64 94]; + i32* %482 = getelementptr &([1000 * i32]* %481)[i64 0][i32 %4]; + store i32* %482 with i32 %480; + [1000 * i32]* %483 = getelementptr &([1000 * i32]* %6)[i64 95]; + i32* %484 = getelementptr &([1000 * i32]* %483)[i64 0][i32 %4]; + i32 %485 = load i32* %484; + [1000 * i32]* %486 = getelementptr &([1000 * i32]* %9)[i64 95]; + i32* %487 = getelementptr &([1000 * i32]* %486)[i64 0][i32 %4]; + store i32* %487 with i32 %485; + [1000 * i32]* %488 = getelementptr &([1000 * i32]* %6)[i64 96]; + i32* %489 = getelementptr &([1000 * i32]* %488)[i64 0][i32 %4]; + i32 %490 = load i32* %489; + [1000 * i32]* %491 = getelementptr &([1000 * i32]* %9)[i64 96]; + i32* %492 = getelementptr &([1000 * i32]* %491)[i64 0][i32 %4]; + store i32* %492 with i32 %490; + [1000 * i32]* %493 = getelementptr &([1000 * i32]* %6)[i64 97]; + i32* %494 = getelementptr &([1000 * i32]* %493)[i64 0][i32 %4]; + i32 %495 = load i32* %494; + [1000 * i32]* %496 = getelementptr &([1000 * i32]* %9)[i64 97]; + i32* %497 = getelementptr &([1000 * i32]* %496)[i64 0][i32 %4]; + store i32* %497 with i32 %495; + [1000 * i32]* %498 = getelementptr &([1000 * i32]* %6)[i64 98]; + i32* %499 = getelementptr &([1000 * i32]* %498)[i64 0][i32 %4]; + i32 %500 = load i32* %499; + [1000 * i32]* %501 = getelementptr &([1000 * i32]* %9)[i64 98]; + i32* %502 = getelementptr &([1000 * i32]* %501)[i64 0][i32 %4]; + store i32* %502 with i32 %500; + [1000 * i32]* %503 = getelementptr &([1000 * i32]* %6)[i64 99]; + i32* %504 = getelementptr &([1000 * i32]* %503)[i64 0][i32 %4]; + i32 %505 = load i32* %504; + [1000 * i32]* %506 = getelementptr &([1000 * i32]* %9)[i64 99]; + i32* %507 = getelementptr &([1000 * i32]* %506)[i64 0][i32 %4]; + store i32* %507 with i32 %505; + [1000 * i32]* %508 = getelementptr &([1000 * i32]* %6)[i64 100]; + i32* %509 = getelementptr &([1000 * i32]* %508)[i64 0][i32 %4]; + i32 %510 = load i32* %509; + [1000 * i32]* %511 = getelementptr &([1000 * i32]* %9)[i64 100]; + i32* %512 = getelementptr &([1000 * i32]* %511)[i64 0][i32 %4]; + store i32* %512 with i32 %510; + [1000 * i32]* %513 = getelementptr &([1000 * i32]* %6)[i64 101]; + i32* %514 = getelementptr &([1000 * i32]* %513)[i64 0][i32 %4]; + i32 %515 = load i32* %514; + [1000 * i32]* %516 = getelementptr &([1000 * i32]* %9)[i64 101]; + i32* %517 = getelementptr &([1000 * i32]* %516)[i64 0][i32 %4]; + store i32* %517 with i32 %515; + [1000 * i32]* %518 = getelementptr &([1000 * i32]* %6)[i64 102]; + i32* %519 = getelementptr &([1000 * i32]* %518)[i64 0][i32 %4]; + i32 %520 = load i32* %519; + [1000 * i32]* %521 = getelementptr &([1000 * i32]* %9)[i64 102]; + i32* %522 = getelementptr &([1000 * i32]* %521)[i64 0][i32 %4]; + store i32* %522 with i32 %520; + [1000 * i32]* %523 = getelementptr &([1000 * i32]* %6)[i64 103]; + i32* %524 = getelementptr &([1000 * i32]* %523)[i64 0][i32 %4]; + i32 %525 = load i32* %524; + [1000 * i32]* %526 = getelementptr &([1000 * i32]* %9)[i64 103]; + i32* %527 = getelementptr &([1000 * i32]* %526)[i64 0][i32 %4]; + store i32* %527 with i32 %525; + i32 %528 = add i32 %4, i32 1; + i1 %529 = icmp sgt i32 %1, i32 %528; + cbr i1 %529(prob = 0.984615), ^b1, ^b2; ^b2: ret; } internal func @cmmc_parallel_body_1(i32 %0, i32 %1) -> void { NoRecurse ParallelBody AlignedParallelBody } { ^b: - [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @c to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %4 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @c to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %4 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; ubr ^b1; ^b1: - i32 %5 = phi [^b, i32 %0] [^b2, i32 %184]; - [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; - [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %4)[i64 0][i32 %5]; + i32 %5 = phi [^b, i32 %0] [^b2, i32 %161]; + [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %5]; + [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; ubr ^while.body; ^while.body: - i32 %8 = phi [^b1, i32 0] [^while.body2, i32 %182]; + i32 %8 = phi [^b1, i32 0] [^while.body2, i32 %159]; + [1000 * i32]* %9 = getelementptr &([1000 * [1000 * i32]]* %4)[i64 0][i32 %8]; ubr ^while.body1; ^while.body1: - i32 %9 = phi [^while.body, i32 0] [^while.body1, i32 %123]; - i32 %10 = phi [^while.body, i32 0] [^while.body1, i32 %122]; - [1000 * i32]* %11 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %9]; - i32* %12 = getelementptr &([1000 * i32]* %11)[i64 0][i32 %8]; - i32* %13 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %9]; - i32 %14 = load i32* %13; - i32 %15 = load i32* %12; - [1000 * i32]* %16 = getelementptr &([1000 * i32]* %11)[i64 1]; - i32* %17 = getelementptr &([1000 * i32]* %16)[i64 0][i32 %8]; - i32* %18 = getelementptr &(i32* %13)[i64 1]; + i32 %10 = phi [^while.body, i32 0] [^while.body1, i32 %108]; + i32 %11 = phi [^while.body, i32 0] [^while.body1, i32 %107]; + i32* %12 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %10]; + i32 %13 = load i32* %12; + i32* %14 = getelementptr &([1000 * i32]* %9)[i64 0][i32 %10]; + i32 %15 = load i32* %14; + i32* %16 = getelementptr &(i32* %12)[i64 1]; + i32 %17 = load i32* %16; + i32* %18 = getelementptr &(i32* %14)[i64 1]; i32 %19 = load i32* %18; - i32 %20 = load i32* %17; - i32 %21 = mul i32 %19, i32 %20; - i32 %22 = mul i32 %14, i32 %15; - i32 %23 = add i32 %21, i32 %22; - [1000 * i32]* %24 = getelementptr &([1000 * i32]* %11)[i64 2]; - i32* %25 = getelementptr &([1000 * i32]* %24)[i64 0][i32 %8]; - i32* %26 = getelementptr &(i32* %13)[i64 2]; - i32 %27 = load i32* %26; - i32 %28 = load i32* %25; - i32 %29 = mul i32 %27, i32 %28; - i32 %30 = add i32 %23, i32 %29; - [1000 * i32]* %31 = getelementptr &([1000 * i32]* %11)[i64 3]; - i32* %32 = getelementptr &([1000 * i32]* %31)[i64 0][i32 %8]; - i32* %33 = getelementptr &(i32* %13)[i64 3]; - i32 %34 = load i32* %33; - i32 %35 = load i32* %32; - i32 %36 = mul i32 %34, i32 %35; - i32 %37 = add i32 %30, i32 %36; - [1000 * i32]* %38 = getelementptr &([1000 * i32]* %11)[i64 4]; - i32* %39 = getelementptr &([1000 * i32]* %38)[i64 0][i32 %8]; - i32* %40 = getelementptr &(i32* %13)[i64 4]; - i32 %41 = load i32* %40; - i32 %42 = load i32* %39; - i32 %43 = mul i32 %41, i32 %42; - i32 %44 = add i32 %37, i32 %43; - [1000 * i32]* %45 = getelementptr &([1000 * i32]* %11)[i64 5]; - i32* %46 = getelementptr &([1000 * i32]* %45)[i64 0][i32 %8]; - i32* %47 = getelementptr &(i32* %13)[i64 5]; + i32 %20 = mul i32 %17, i32 %19; + i32 %21 = mul i32 %13, i32 %15; + i32 %22 = add i32 %20, i32 %21; + i32* %23 = getelementptr &(i32* %12)[i64 2]; + i32 %24 = load i32* %23; + i32* %25 = getelementptr &(i32* %14)[i64 2]; + i32 %26 = load i32* %25; + i32 %27 = mul i32 %24, i32 %26; + i32 %28 = add i32 %22, i32 %27; + i32* %29 = getelementptr &(i32* %12)[i64 3]; + i32 %30 = load i32* %29; + i32* %31 = getelementptr &(i32* %14)[i64 3]; + i32 %32 = load i32* %31; + i32 %33 = mul i32 %30, i32 %32; + i32 %34 = add i32 %28, i32 %33; + i32* %35 = getelementptr &(i32* %12)[i64 4]; + i32 %36 = load i32* %35; + i32* %37 = getelementptr &(i32* %14)[i64 4]; + i32 %38 = load i32* %37; + i32 %39 = mul i32 %36, i32 %38; + i32 %40 = add i32 %34, i32 %39; + i32* %41 = getelementptr &(i32* %12)[i64 5]; + i32 %42 = load i32* %41; + i32* %43 = getelementptr &(i32* %14)[i64 5]; + i32 %44 = load i32* %43; + i32 %45 = mul i32 %42, i32 %44; + i32 %46 = add i32 %40, i32 %45; + i32* %47 = getelementptr &(i32* %12)[i64 6]; i32 %48 = load i32* %47; - i32 %49 = load i32* %46; - i32 %50 = mul i32 %48, i32 %49; - i32 %51 = add i32 %44, i32 %50; - [1000 * i32]* %52 = getelementptr &([1000 * i32]* %11)[i64 6]; - i32* %53 = getelementptr &([1000 * i32]* %52)[i64 0][i32 %8]; - i32* %54 = getelementptr &(i32* %13)[i64 6]; - i32 %55 = load i32* %54; - i32 %56 = load i32* %53; - i32 %57 = mul i32 %55, i32 %56; - i32 %58 = add i32 %51, i32 %57; - [1000 * i32]* %59 = getelementptr &([1000 * i32]* %11)[i64 7]; - i32* %60 = getelementptr &([1000 * i32]* %59)[i64 0][i32 %8]; - i32* %61 = getelementptr &(i32* %13)[i64 7]; + i32* %49 = getelementptr &(i32* %14)[i64 6]; + i32 %50 = load i32* %49; + i32 %51 = mul i32 %48, i32 %50; + i32 %52 = add i32 %46, i32 %51; + i32* %53 = getelementptr &(i32* %12)[i64 7]; + i32 %54 = load i32* %53; + i32* %55 = getelementptr &(i32* %14)[i64 7]; + i32 %56 = load i32* %55; + i32 %57 = mul i32 %54, i32 %56; + i32 %58 = add i32 %52, i32 %57; + i32* %59 = getelementptr &(i32* %12)[i64 8]; + i32 %60 = load i32* %59; + i32* %61 = getelementptr &(i32* %14)[i64 8]; i32 %62 = load i32* %61; - i32 %63 = load i32* %60; - i32 %64 = mul i32 %62, i32 %63; - i32 %65 = add i32 %58, i32 %64; - [1000 * i32]* %66 = getelementptr &([1000 * i32]* %11)[i64 8]; - i32* %67 = getelementptr &([1000 * i32]* %66)[i64 0][i32 %8]; - i32* %68 = getelementptr &(i32* %13)[i64 8]; - i32 %69 = load i32* %68; - i32 %70 = load i32* %67; - i32 %71 = mul i32 %69, i32 %70; - i32 %72 = add i32 %65, i32 %71; - [1000 * i32]* %73 = getelementptr &([1000 * i32]* %11)[i64 9]; - i32* %74 = getelementptr &([1000 * i32]* %73)[i64 0][i32 %8]; - i32* %75 = getelementptr &(i32* %13)[i64 9]; - i32 %76 = load i32* %75; - i32 %77 = load i32* %74; - i32 %78 = mul i32 %76, i32 %77; - i32 %79 = add i32 %72, i32 %78; - [1000 * i32]* %80 = getelementptr &([1000 * i32]* %11)[i64 10]; - i32* %81 = getelementptr &([1000 * i32]* %80)[i64 0][i32 %8]; - i32* %82 = getelementptr &(i32* %13)[i64 10]; - i32 %83 = load i32* %82; - i32 %84 = load i32* %81; - i32 %85 = mul i32 %83, i32 %84; - i32 %86 = add i32 %79, i32 %85; - [1000 * i32]* %87 = getelementptr &([1000 * i32]* %11)[i64 11]; - i32* %88 = getelementptr &([1000 * i32]* %87)[i64 0][i32 %8]; - i32* %89 = getelementptr &(i32* %13)[i64 11]; + i32 %63 = mul i32 %60, i32 %62; + i32 %64 = add i32 %58, i32 %63; + i32* %65 = getelementptr &(i32* %12)[i64 9]; + i32 %66 = load i32* %65; + i32* %67 = getelementptr &(i32* %14)[i64 9]; + i32 %68 = load i32* %67; + i32 %69 = mul i32 %66, i32 %68; + i32 %70 = add i32 %64, i32 %69; + i32* %71 = getelementptr &(i32* %12)[i64 10]; + i32 %72 = load i32* %71; + i32* %73 = getelementptr &(i32* %14)[i64 10]; + i32 %74 = load i32* %73; + i32 %75 = mul i32 %72, i32 %74; + i32 %76 = add i32 %70, i32 %75; + i32* %77 = getelementptr &(i32* %12)[i64 11]; + i32 %78 = load i32* %77; + i32* %79 = getelementptr &(i32* %14)[i64 11]; + i32 %80 = load i32* %79; + i32 %81 = mul i32 %78, i32 %80; + i32 %82 = add i32 %76, i32 %81; + i32* %83 = getelementptr &(i32* %12)[i64 12]; + i32 %84 = load i32* %83; + i32* %85 = getelementptr &(i32* %14)[i64 12]; + i32 %86 = load i32* %85; + i32 %87 = mul i32 %84, i32 %86; + i32 %88 = add i32 %82, i32 %87; + i32* %89 = getelementptr &(i32* %12)[i64 13]; i32 %90 = load i32* %89; - i32 %91 = load i32* %88; - i32 %92 = mul i32 %90, i32 %91; - i32 %93 = add i32 %86, i32 %92; - [1000 * i32]* %94 = getelementptr &([1000 * i32]* %11)[i64 12]; - i32* %95 = getelementptr &([1000 * i32]* %94)[i64 0][i32 %8]; - i32* %96 = getelementptr &(i32* %13)[i64 12]; - i32 %97 = load i32* %96; - i32 %98 = load i32* %95; - i32 %99 = mul i32 %97, i32 %98; - i32 %100 = add i32 %93, i32 %99; - [1000 * i32]* %101 = getelementptr &([1000 * i32]* %11)[i64 13]; - i32* %102 = getelementptr &([1000 * i32]* %101)[i64 0][i32 %8]; - i32* %103 = getelementptr &(i32* %13)[i64 13]; + i32* %91 = getelementptr &(i32* %14)[i64 13]; + i32 %92 = load i32* %91; + i32 %93 = mul i32 %90, i32 %92; + i32 %94 = add i32 %88, i32 %93; + i32* %95 = getelementptr &(i32* %12)[i64 14]; + i32 %96 = load i32* %95; + i32* %97 = getelementptr &(i32* %14)[i64 14]; + i32 %98 = load i32* %97; + i32 %99 = mul i32 %96, i32 %98; + i32 %100 = add i32 %94, i32 %99; + i32* %101 = getelementptr &(i32* %12)[i64 15]; + i32 %102 = load i32* %101; + i32* %103 = getelementptr &(i32* %14)[i64 15]; i32 %104 = load i32* %103; - i32 %105 = load i32* %102; - i32 %106 = mul i32 %104, i32 %105; - i32 %107 = add i32 %100, i32 %106; - [1000 * i32]* %108 = getelementptr &([1000 * i32]* %11)[i64 14]; - i32* %109 = getelementptr &([1000 * i32]* %108)[i64 0][i32 %8]; - i32* %110 = getelementptr &(i32* %13)[i64 14]; - i32 %111 = load i32* %110; - i32 %112 = load i32* %109; - i32 %113 = mul i32 %111, i32 %112; - i32 %114 = add i32 %107, i32 %113; - [1000 * i32]* %115 = getelementptr &([1000 * i32]* %11)[i64 15]; - i32* %116 = getelementptr &([1000 * i32]* %115)[i64 0][i32 %8]; - i32* %117 = getelementptr &(i32* %13)[i64 15]; - i32 %118 = load i32* %117; - i32 %119 = load i32* %116; - i32 %120 = mul i32 %118, i32 %119; - i32 %121 = add i32 %114, i32 %120; - i32 %122 = add i32 %10, i32 %121; - i32 %123 = add i32 %9, i32 16; - i1 %124 = icmp slt i32 %123, i32 992; - cbr i1 %124(prob = 0.983871), ^while.body1, ^while.body2; + i32 %105 = mul i32 %102, i32 %104; + i32 %106 = add i32 %100, i32 %105; + i32 %107 = add i32 %11, i32 %106; + i32 %108 = add i32 %10, i32 16; + i1 %109 = icmp slt i32 %108, i32 992; + cbr i1 %109(prob = 0.983871), ^while.body1, ^while.body2; ^while.body2: - [1000 * i32]* %125 = getelementptr &([1000 * i32]* %11)[i64 16]; - i32* %126 = getelementptr &([1000 * i32]* %125)[i64 0][i32 %8]; - i32* %127 = getelementptr &(i32* %13)[i64 16]; - i32 %128 = load i32* %127; - i32 %129 = load i32* %126; - i32 %130 = mul i32 %128, i32 %129; - i32 %131 = add i32 %122, i32 %130; - [1000 * i32]* %132 = getelementptr &([1000 * i32]* %11)[i64 17]; - i32* %133 = getelementptr &([1000 * i32]* %132)[i64 0][i32 %8]; - i32* %134 = getelementptr &(i32* %13)[i64 17]; + i32* %110 = getelementptr &(i32* %12)[i64 16]; + i32 %111 = load i32* %110; + i32* %112 = getelementptr &(i32* %14)[i64 16]; + i32 %113 = load i32* %112; + i32 %114 = mul i32 %111, i32 %113; + i32 %115 = add i32 %107, i32 %114; + i32* %116 = getelementptr &(i32* %12)[i64 17]; + i32 %117 = load i32* %116; + i32* %118 = getelementptr &(i32* %14)[i64 17]; + i32 %119 = load i32* %118; + i32 %120 = mul i32 %117, i32 %119; + i32 %121 = add i32 %115, i32 %120; + i32* %122 = getelementptr &(i32* %12)[i64 18]; + i32 %123 = load i32* %122; + i32* %124 = getelementptr &(i32* %14)[i64 18]; + i32 %125 = load i32* %124; + i32 %126 = mul i32 %123, i32 %125; + i32 %127 = add i32 %121, i32 %126; + i32* %128 = getelementptr &(i32* %12)[i64 19]; + i32 %129 = load i32* %128; + i32* %130 = getelementptr &(i32* %14)[i64 19]; + i32 %131 = load i32* %130; + i32 %132 = mul i32 %129, i32 %131; + i32 %133 = add i32 %127, i32 %132; + i32* %134 = getelementptr &(i32* %12)[i64 20]; i32 %135 = load i32* %134; - i32 %136 = load i32* %133; - i32 %137 = mul i32 %135, i32 %136; - i32 %138 = add i32 %131, i32 %137; - [1000 * i32]* %139 = getelementptr &([1000 * i32]* %11)[i64 18]; - i32* %140 = getelementptr &([1000 * i32]* %139)[i64 0][i32 %8]; - i32* %141 = getelementptr &(i32* %13)[i64 18]; - i32 %142 = load i32* %141; - i32 %143 = load i32* %140; - i32 %144 = mul i32 %142, i32 %143; - i32 %145 = add i32 %138, i32 %144; - [1000 * i32]* %146 = getelementptr &([1000 * i32]* %11)[i64 19]; - i32* %147 = getelementptr &([1000 * i32]* %146)[i64 0][i32 %8]; - i32* %148 = getelementptr &(i32* %13)[i64 19]; + i32* %136 = getelementptr &(i32* %14)[i64 20]; + i32 %137 = load i32* %136; + i32 %138 = mul i32 %135, i32 %137; + i32 %139 = add i32 %133, i32 %138; + i32* %140 = getelementptr &(i32* %12)[i64 21]; + i32 %141 = load i32* %140; + i32* %142 = getelementptr &(i32* %14)[i64 21]; + i32 %143 = load i32* %142; + i32 %144 = mul i32 %141, i32 %143; + i32 %145 = add i32 %139, i32 %144; + i32* %146 = getelementptr &(i32* %12)[i64 22]; + i32 %147 = load i32* %146; + i32* %148 = getelementptr &(i32* %14)[i64 22]; i32 %149 = load i32* %148; - i32 %150 = load i32* %147; - i32 %151 = mul i32 %149, i32 %150; - i32 %152 = add i32 %145, i32 %151; - [1000 * i32]* %153 = getelementptr &([1000 * i32]* %11)[i64 20]; - i32* %154 = getelementptr &([1000 * i32]* %153)[i64 0][i32 %8]; - i32* %155 = getelementptr &(i32* %13)[i64 20]; - i32 %156 = load i32* %155; - i32 %157 = load i32* %154; - i32 %158 = mul i32 %156, i32 %157; - i32 %159 = add i32 %152, i32 %158; - [1000 * i32]* %160 = getelementptr &([1000 * i32]* %11)[i64 21]; - i32* %161 = getelementptr &([1000 * i32]* %160)[i64 0][i32 %8]; - i32* %162 = getelementptr &(i32* %13)[i64 21]; - i32 %163 = load i32* %162; - i32 %164 = load i32* %161; - i32 %165 = mul i32 %163, i32 %164; - i32 %166 = add i32 %159, i32 %165; - [1000 * i32]* %167 = getelementptr &([1000 * i32]* %11)[i64 22]; - i32* %168 = getelementptr &([1000 * i32]* %167)[i64 0][i32 %8]; - i32* %169 = getelementptr &(i32* %13)[i64 22]; - i32 %170 = load i32* %169; - i32 %171 = load i32* %168; - i32 %172 = mul i32 %170, i32 %171; - i32 %173 = add i32 %166, i32 %172; - [1000 * i32]* %174 = getelementptr &([1000 * i32]* %11)[i64 23]; - i32* %175 = getelementptr &([1000 * i32]* %174)[i64 0][i32 %8]; - i32* %176 = getelementptr &(i32* %13)[i64 23]; - i32 %177 = load i32* %176; - i32 %178 = load i32* %175; - i32 %179 = mul i32 %177, i32 %178; - i32 %180 = add i32 %173, i32 %179; - i32* %181 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %8]; - store i32* %181 with i32 %180; - i32 %182 = add i32 %8, i32 1; - i1 %183 = icmp slt i32 %182, i32 1000; - cbr i1 %183(prob = 0.999), ^while.body, ^b2; + i32 %150 = mul i32 %147, i32 %149; + i32 %151 = add i32 %145, i32 %150; + i32* %152 = getelementptr &(i32* %12)[i64 23]; + i32 %153 = load i32* %152; + i32* %154 = getelementptr &(i32* %14)[i64 23]; + i32 %155 = load i32* %154; + i32 %156 = mul i32 %153, i32 %155; + i32 %157 = add i32 %151, i32 %156; + i32* %158 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %8]; + store i32* %158 with i32 %157; + i32 %159 = add i32 %8, i32 1; + i1 %160 = icmp slt i32 %159, i32 1000; + cbr i1 %160(prob = 0.999), ^while.body, ^b2; ^b2: - i32 %184 = add i32 %5, i32 1; - i1 %185 = icmp sgt i32 %1, i32 %184; - cbr i1 %185(prob = 0.984615), ^b1, ^b3; + i32 %161 = add i32 %5, i32 1; + i1 %162 = icmp sgt i32 %1, i32 %161; + cbr i1 %162(prob = 0.984615), ^b1, ^b3; ^b3: ret; } diff --git a/tests/SysY2022/performance/matmul2.riscv.s b/tests/SysY2022/performance/matmul2.riscv.s index cbbefb151..301b801f7 100644 --- a/tests/SysY2022/performance/matmul2.riscv.s +++ b/tests/SysY2022/performance/matmul2.riscv.s @@ -21,340 +21,355 @@ cmmc_parallel_body_payload_3: .globl main main: addi sp, sp, -88 -pcrel2037: +pcrel2255: auipc a0, %pcrel_hi(a) -pcrel2038: - auipc a1, %pcrel_hi(cmmc_parallel_body_2) +pcrel2256: + auipc a1, %pcrel_hi(cmmc_parallel_body_3) sd ra, 0(sp) sd s5, 8(sp) - addi s5, a0, %pcrel_lo(pcrel2037) + addi s5, a0, %pcrel_lo(pcrel2255) sd s0, 16(sp) -pcrel2039: +pcrel2257: auipc a0, %pcrel_hi(c) sd s7, 24(sp) -pcrel2040: +pcrel2258: auipc s7, %pcrel_hi(cmmc_parallel_body_payload_2) sd s8, 32(sp) - addi s8, s7, %pcrel_lo(pcrel2040) + addi s8, s7, %pcrel_lo(pcrel2258) sd s1, 40(sp) - addi s1, a0, %pcrel_lo(pcrel2039) + addi s1, a0, %pcrel_lo(pcrel2257) sd s6, 48(sp) -pcrel2041: - auipc a0, %pcrel_hi(cmmc_parallel_body_3) - addi s6, a1, %pcrel_lo(pcrel2038) +pcrel2259: + auipc a0, %pcrel_hi(cmmc_parallel_body_2) sd s9, 56(sp) + addi s6, a0, %pcrel_lo(pcrel2259) mv s9, zero + li a0, 125 sd s2, 64(sp) - addi s2, a0, %pcrel_lo(pcrel2041) + slli s0, a0, 5 + addi s2, a1, %pcrel_lo(pcrel2256) sd s3, 72(sp) - li a0, 125 + slli s3, s0, 1 sd s4, 80(sp) - slli s0, a0, 5 sh1add s4, s0, s0 - slli s3, s0, 1 -label1349: +label1566: li a0, 1000 - bge s9, a0, label1356 + bge s9, a0, label1571 mv a0, s5 jal getarray li a1, 1000 - bne a0, a1, label1354 + beq a0, a1, label1570 +label1599: + ld ra, 0(sp) + ld s5, 8(sp) + ld s0, 16(sp) + ld s7, 24(sp) + ld s8, 32(sp) + ld s1, 40(sp) + ld s6, 48(sp) + ld s9, 56(sp) + ld s2, 64(sp) + ld s3, 72(sp) + ld s4, 80(sp) + addi sp, sp, 88 + ret +label1570: addiw s9, s9, 1 add s5, s5, s0 - j label1349 -label1356: + j label1566 +label1571: li a0, 23 jal _sysy_starttime li a1, 1000 mv a0, zero -pcrel2042: +pcrel2260: auipc a3, %pcrel_hi(cmmc_parallel_body_0) - addi a2, a3, %pcrel_lo(pcrel2042) + addi a2, a3, %pcrel_lo(pcrel2260) jal cmmcParallelFor li a1, 1000 mv a0, zero -pcrel2043: +pcrel2261: auipc a3, %pcrel_hi(cmmc_parallel_body_1) - addi a2, a3, %pcrel_lo(pcrel2043) + addi a2, a3, %pcrel_lo(pcrel2261) jal cmmcParallelFor mv s9, zero mv s5, s1 mv a0, s1 mv a1, zero - lui a3, 524288 - addiw a2, a3, -1 - j label1360 + lui a4, 524288 + addiw a2, a4, -1 + j label1575 .p2align 2 -label1383: +label1598: addi a0, a0, 256 .p2align 2 -label1360: - ld a4, 0(a0) +label1575: + ld a3, 0(a0) addiw a1, a1, 64 - srai t1, a4, 32 - sext.w a3, a4 - min t0, a2, a3 - ld a3, 8(a0) - min a5, t0, t1 - srai t2, a3, 32 - sext.w a2, a3 - min t0, a5, a2 - ld a2, 16(a0) - min a4, t0, t2 - sext.w t1, a2 - srai t2, a2, 32 - min a5, a4, t1 - ld a4, 24(a0) - min a3, a5, t2 - srai t1, a4, 32 - sext.w t0, a4 - min a5, a3, t0 - ld a3, 32(a0) - min a2, a5, t1 - srai t0, a3, 32 - sext.w t2, a3 - min a5, a2, t2 - ld a2, 40(a0) - min a4, a5, t0 - srai t1, a2, 32 + srai t1, a3, 32 + sext.w t0, a3 + min a5, a2, t0 + ld a2, 8(a0) + min a4, a5, t1 sext.w a3, a2 + srai t1, a2, 32 min t0, a4, a3 - ld a3, 48(a0) + ld a3, 16(a0) min a5, t0, t1 srai t2, a3, 32 sext.w a4, a3 min t0, a5, a4 - ld a4, 56(a0) + ld a4, 24(a0) min a2, t0, t2 sext.w t1, a4 srai t0, a4, 32 min a5, a2, t1 - ld a2, 64(a0) + ld a2, 32(a0) min a3, a5, t0 sext.w t1, a2 srai t0, a2, 32 min a5, a3, t1 - ld a3, 72(a0) + ld a3, 40(a0) min a4, a5, t0 srai t1, a3, 32 sext.w a2, a3 min t0, a4, a2 - ld a2, 80(a0) + ld a2, 48(a0) min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 min t0, a5, a3 - ld a3, 88(a0) + ld a3, 56(a0) min a4, t0, t1 sext.w a2, a3 srai t1, a3, 32 - min a5, a4, a2 - ld a2, 96(a0) - min t0, a5, t1 + min t0, a4, a2 + ld a2, 64(a0) + min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 - min a4, t0, a3 - ld a3, 104(a0) - min a5, a4, t1 - sext.w a2, a3 - srai t1, a3, 32 - min t0, a5, a2 - ld a2, 112(a0) + min t0, a5, a3 + ld a3, 72(a0) min a4, t0, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 120(a0) - min a5, t0, t1 sext.w a2, a3 srai t1, a3, 32 - min t0, a5, a2 - ld a2, 128(a0) - min a4, t0, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 136(a0) + min t0, a4, a2 + ld a2, 80(a0) min a5, t0, t1 - sext.w a2, a3 - srai t1, a3, 32 - min a4, a5, a2 - ld a2, 144(a0) - min t0, a4, t1 + srai t2, a2, 32 sext.w a3, a2 - srai t1, a2, 32 - min a5, t0, a3 - ld a3, 152(a0) - min a4, a5, t1 - sext.w a2, a3 + min t0, a5, a3 + ld a3, 88(a0) + min a4, t0, t2 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a4, t1 + ld a4, 96(a0) + min a2, a5, t0 + sext.w t1, a4 + srai t0, a4, 32 + min a5, a2, t1 + ld a2, 104(a0) + min a3, a5, t0 + sext.w t1, a2 + srai t0, a2, 32 + min a5, a3, t1 + ld a3, 112(a0) + min a4, a5, t0 srai t1, a3, 32 + sext.w a2, a3 min t0, a4, a2 - ld a2, 160(a0) + ld a2, 120(a0) min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 min t0, a5, a3 - ld a3, 168(a0) + ld a3, 128(a0) min a4, t0, t1 sext.w a2, a3 srai t1, a3, 32 min t0, a4, a2 - ld a2, 176(a0) + ld a2, 136(a0) + min a5, t0, t1 + srai t2, a2, 32 + sext.w a4, a2 + min t0, a5, a4 + ld a4, 144(a0) + min a3, t0, t2 + sext.w t1, a4 + srai t0, a4, 32 + min a5, a3, t1 + ld a3, 152(a0) + min a2, a5, t0 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a2, t1 + ld a2, 160(a0) + min a4, a5, t0 + srai t1, a2, 32 + sext.w a3, a2 + min t0, a4, a3 + ld a3, 168(a0) min a5, t0, t1 + sext.w a2, a3 + srai t1, a3, 32 + min a4, a5, a2 + ld a2, 176(a0) + min t0, a4, t1 sext.w a3, a2 srai t1, a2, 32 - min a4, a5, a3 + min a5, t0, a3 ld a3, 184(a0) - min t0, a4, t1 + min a4, a5, t1 sext.w a2, a3 srai t1, a3, 32 - min a5, t0, a2 + min t0, a4, a2 ld a2, 192(a0) - min a4, a5, t1 - sext.w t0, a2 + min a5, t0, t1 + sext.w a4, a2 srai t1, a2, 32 - min a5, a4, t0 + min t0, a5, a4 ld a4, 200(a0) - min a3, a5, t1 - sext.w t0, a4 + min a3, t0, t1 + sext.w t2, a4 + srai t0, a4, 32 + min a5, a3, t2 + ld a3, 208(a0) + min a2, a5, t0 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a2, t1 + ld a2, 216(a0) + min a4, a5, t0 + srai t2, a2, 32 + sext.w t1, a2 + min a5, a4, t1 + ld a4, 224(a0) + min a3, a5, t2 srai t1, a4, 32 + sext.w t0, a4 min a5, a3, t0 - ld a3, 208(a0) + ld a3, 232(a0) min a2, a5, t1 sext.w t0, a3 srai t1, a3, 32 - min a5, a2, t0 - ld a2, 216(a0) - min a4, a5, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 224(a0) - min a5, t0, t1 - sext.w a2, a3 - srai t1, a3, 32 - min a4, a5, a2 - ld a2, 232(a0) - min t0, a4, t1 + min a4, a2, t0 + ld a2, 240(a0) + min a5, a4, t1 sext.w a3, a2 srai t1, a2, 32 - min a5, t0, a3 - ld a3, 240(a0) - min a4, a5, t1 - srai t0, a3, 32 + min t0, a5, a3 + ld a3, 248(a0) + min a4, t0, t1 sext.w t2, a3 - min a2, a4, t2 - ld a4, 248(a0) - min a5, a2, t0 - sext.w t1, a4 - srai t0, a4, 32 - min a3, a5, t1 - li a4, 960 - min a2, a3, t0 - blt a1, a4, label1383 - ld a3, 256(a0) srai t0, a3, 32 - sext.w a1, a3 - min a5, a2, a1 - ld a1, 264(a0) - min a4, a5, t0 - sext.w a2, a1 - srai t0, a1, 32 - min a5, a4, a2 - ld a2, 272(a0) - min a3, a5, t0 - sext.w a1, a2 - srai t0, a2, 32 - min a5, a3, a1 - ld a1, 280(a0) - min a4, a5, t0 - sext.w a3, a1 - srai t0, a1, 32 - min a5, a4, a3 - ld a3, 288(a0) + min a5, a4, t2 + li a3, 960 min a2, a5, t0 - sext.w t1, a3 - srai a5, a3, 32 - min a4, a2, t1 - ld a2, 296(a0) - min a1, a4, a5 - sext.w t0, a2 - srai a5, a2, 32 - min a4, a1, t0 - ld a1, 304(a0) - min a3, a4, a5 + blt a1, a3, label1598 + ld a1, 256(a0) srai t0, a1, 32 - sext.w a2, a1 - min a5, a3, a2 - ld a2, 312(a0) - min a4, a5, t0 + sext.w a5, a1 + min a3, a2, a5 + ld a2, 264(a0) + min a4, a3, t0 sext.w a1, a2 srai t0, a2, 32 min a5, a4, a1 - ld a1, 320(a0) + ld a1, 272(a0) min a3, a5, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 328(a0) + ld a2, 280(a0) min a4, a5, t0 - srai t1, a2, 32 - sext.w a3, a2 - min a5, a4, a3 - ld a3, 336(a0) - min a1, a5, t1 - sext.w t0, a3 - srai t1, a3, 32 - min a4, a1, t0 - ld a1, 344(a0) - min a2, a4, t1 - srai t0, a1, 32 - sext.w a5, a1 - min a3, a2, a5 - ld a2, 352(a0) - min a4, a3, t0 sext.w a1, a2 srai t0, a2, 32 min a5, a4, a1 - ld a1, 360(a0) + ld a1, 288(a0) min a3, a5, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 368(a0) + ld a2, 296(a0) min a4, a5, t0 sext.w a1, a2 srai t0, a2, 32 - min a5, a4, a1 - ld a1, 376(a0) - min a3, a5, t0 + min a3, a4, a1 + ld a1, 304(a0) + min a5, a3, t0 sext.w a2, a1 srai t0, a1, 32 - min a5, a3, a2 - ld a2, 384(a0) - min a4, a5, t0 + min a4, a5, a2 + ld a2, 312(a0) + min a3, a4, t0 sext.w a1, a2 srai t0, a2, 32 - min a5, a4, a1 - ld a1, 392(a0) - min a3, a5, t0 + min a5, a3, a1 + ld a1, 320(a0) + min a4, a5, t0 + srai t1, a1, 32 + sext.w a3, a1 + min a5, a4, a3 + ld a3, 328(a0) + min a2, a5, t1 + sext.w t0, a3 + srai t1, a3, 32 + min a4, a2, t0 + ld a2, 336(a0) + min a1, a4, t1 + srai t0, a2, 32 + sext.w a5, a2 + min a4, a1, a5 + ld a1, 344(a0) + min a3, a4, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 400(a0) + ld a2, 352(a0) min a4, a5, t0 - srai t1, a2, 32 sext.w a1, a2 + srai t0, a2, 32 min a5, a4, a1 + ld a1, 360(a0) + min a3, a5, t0 + sext.w t1, a1 + srai t0, a1, 32 + min a4, a3, t1 + ld a3, 368(a0) + min a2, a4, t0 + sext.w a5, a3 + srai t0, a3, 32 + min a4, a2, a5 + ld a2, 376(a0) + min a1, a4, t0 + sext.w a5, a2 + srai t0, a2, 32 + min a4, a1, a5 + ld a1, 384(a0) + min a3, a4, t0 + sext.w a5, a1 + srai t0, a1, 32 + min a4, a3, a5 + ld a3, 392(a0) + min a2, a4, t0 + sext.w a5, a3 + srai t0, a3, 32 + min a4, a2, a5 + ld a2, 400(a0) + min a1, a4, t0 + sext.w a5, a2 + srai t0, a2, 32 + min a4, a1, a5 ld a1, 408(a0) - min a3, a5, t1 -pcrel2044: + min a3, a4, t0 +pcrel2262: auipc s7, %pcrel_hi(cmmc_parallel_body_payload_2) - sd s5, %pcrel_lo(pcrel2044)(s7) + sd s5, %pcrel_lo(pcrel2262)(s7) srai a0, a1, 32 - sext.w t0, a1 + sext.w a5, a1 li a1, 1000 - min a4, a3, t0 + min a4, a3, a5 min a2, a4, a0 mv a0, zero sw a2, 8(s8) @@ -362,1174 +377,1276 @@ pcrel2044: jal cmmcParallelFor li a0, 1000 addiw s9, s9, 1 - bge s9, a0, label1365 + bge s9, a0, label1580 add s5, s5, s0 mv a1, zero - lui a3, 524288 + lui a4, 524288 mv a0, s5 - addiw a2, a3, -1 - j label1360 -label1381: - li a0, 92 - jal _sysy_stoptime - mv a0, s6 - jal putint - mv a0, zero -label1354: - ld ra, 0(sp) - ld s5, 8(sp) - ld s0, 16(sp) - ld s7, 24(sp) - ld s8, 32(sp) - ld s1, 40(sp) - ld s6, 48(sp) - ld s9, 56(sp) - ld s2, 64(sp) - ld s3, 72(sp) - ld s4, 80(sp) - addi sp, sp, 88 - ret -label1365: + addiw a2, a4, -1 + j label1575 +label1580: mv a2, s1 mv a0, zero mv a1, s1 mv a4, zero - j label1369 + j label1584 .p2align 2 -label1372: +label1596: addi a1, a1, 64 .p2align 2 -label1369: +label1584: mul t0, a4, s0 - li t5, 375 - li t6, 875 addiw a4, a4, 16 add a3, s1, t0 sh2add a5, a0, a3 - add t3, a3, s4 + add t2, a3, s0 lw t1, 0(a5) - add a5, a3, s0 + sh2add a5, a0, t2 subw t0, zero, t1 - sh2add t1, a0, a5 + add t2, a3, s3 sw t0, 0(a1) - lw t0, 0(t1) - add t1, a3, s3 - subw t2, zero, t0 - sh2add a5, a0, t1 - sh2add t1, a0, t3 - sw t2, 4(a1) - lw t2, 0(a5) - subw t0, zero, t2 - sw t0, 8(a1) - lw a5, 0(t1) - li t1, 125 - subw t0, zero, a5 - slli t2, t1, 7 + lw t1, 0(a5) + sh2add a5, a0, t2 + subw t0, zero, t1 + sw t0, 4(a1) + add t0, a3, s4 + lw t3, 0(a5) + sh2add t2, a0, t0 + subw t1, zero, t3 + li t0, 125 + sw t1, 8(a1) + lw a5, 0(t2) + slli t2, t0, 7 + subw t1, zero, a5 add a5, a3, t2 - sw t0, 12(a1) - sh2add t0, a0, a5 - lw t1, 0(t0) - li t0, 625 - subw t3, zero, t1 - slli t1, t0, 5 + sh2add t3, a0, a5 + sw t1, 12(a1) + lw t1, 0(t3) + li t3, 625 + subw t0, zero, t1 + slli t1, t3, 5 add a5, a3, t1 - sw t3, 16(a1) + sw t0, 16(a1) sh2add t4, a0, a5 + li a5, 375 lw t0, 0(t4) subw t3, zero, t0 - slli t0, t5, 6 + slli t0, a5, 6 add t4, a3, t0 sw t3, 20(a1) - sh2add a5, a0, t4 - lw t3, 0(a5) - slli a5, t6, 5 - subw t5, zero, t3 - add t4, a3, a5 - sw t5, 24(a1) sh2add t5, a0, t4 - slli t4, t2, 1 - lw t3, 0(t5) - subw t6, zero, t3 - add t3, a3, t4 - sh2add t5, a0, t3 - sw t6, 28(a1) - lw t2, 0(t5) + lw a5, 0(t5) + li t5, 875 + subw t3, zero, a5 + slli a5, t5, 5 + add t4, a3, a5 + sw t3, 24(a1) + sh2add t3, a0, t4 + lw t6, 0(t3) + slli t3, t2, 1 + subw t5, zero, t6 + add t6, a3, t3 + sh2add t4, a0, t6 + sw t5, 28(a1) li t5, 1125 - subw t4, zero, t2 + lw t2, 0(t4) + subw t3, zero, t2 slli t2, t5, 5 - sw t4, 32(a1) add t4, a3, t2 + sw t3, 32(a1) + slli t2, t1, 1 sh2add t3, a0, t4 lw t5, 0(t3) - slli t3, t1, 1 - subw t2, zero, t5 - add t5, a3, t3 - li t3, 1375 - sw t2, 36(a1) - sh2add t2, a0, t5 - lw t4, 0(t2) - slli t2, t3, 5 - subw t1, zero, t4 + subw t6, zero, t5 + add t5, a3, t2 + sh2add t4, a0, t5 + sw t6, 36(a1) + lw t3, 0(t4) + li t4, 1375 + subw t1, zero, t3 + slli t3, t4, 5 + add t2, a3, t3 sw t1, 40(a1) - add t1, a3, t2 - sh2add t5, a0, t1 - lw t3, 0(t5) - slli t5, t0, 1 - subw t4, zero, t3 - add t2, a3, t5 sh2add t1, a0, t2 - sw t4, 44(a1) - li t4, 1625 - lw t3, 0(t1) - slli t2, t4, 5 + slli t2, t0, 1 + lw t4, 0(t1) + add t1, a3, t2 + subw t3, zero, t4 + li t2, 1625 + sh2add t4, a0, t1 + slli t1, t2, 5 + sw t3, 44(a1) + lw t3, 0(t4) subw t0, zero, t3 - sw t0, 48(a1) - add t0, a3, t2 - sh2add t1, a0, t0 - lw t3, 0(t1) - slli t1, a5, 1 - subw t2, zero, t3 add t3, a3, t1 + sh2add t4, a0, t3 + sw t0, 48(a1) + slli t3, a5, 1 + lw t0, 0(t4) + add t1, a3, t3 + li t4, 1875 + subw t2, zero, t0 + slli t3, t4, 5 + sh2add t0, a0, t1 sw t2, 52(a1) - sh2add t2, a0, t3 - li t3, 1875 - lw t0, 0(t2) + lw a5, 0(t0) + add t0, a3, t3 + subw t2, zero, a5 + sh2add t1, a0, t0 + sw t2, 56(a1) + lw a5, 0(t1) + subw t2, zero, a5 + li a5, 992 + sw t2, 60(a1) + blt a4, a5, label1596 + li t0, 125 + lui t2, 17 + slli a5, t0, 9 + add t1, a3, a5 + sh2add a4, a0, t1 + lw t0, 0(a4) + addiw a4, t2, -1632 subw a5, zero, t0 - slli t0, t3, 5 - add t2, a3, t0 - sw a5, 56(a1) - li t0, 992 - sh2add t1, a0, t2 - lw t3, 0(t1) - subw a5, zero, t3 - sw a5, 60(a1) - blt a4, t0, label1372 - li t2, 125 - slli a5, t2, 9 + li t2, 1125 + add t0, a3, a4 + sh2add t1, a0, t0 + sw a5, 64(a1) + lw a5, 0(t1) + subw a4, zero, a5 + slli a5, t2, 6 + lui t2, 19 add t1, a3, a5 + sw a4, 68(a1) sh2add t0, a0, t1 lw a4, 0(t0) - lui t0, 17 subw a5, zero, a4 - addiw a4, t0, -1632 - add t2, a3, a4 - sw a5, 64(a1) - sh2add t1, a0, t2 - li t2, 1125 - lw t0, 0(t1) - slli a4, t2, 6 - subw a5, zero, t0 - lui t2, 21 + addiw a4, t2, -1824 + li t2, 625 add t1, a3, a4 + sw a5, 72(a1) sh2add t0, a0, t1 - sw a5, 68(a1) - lui t1, 19 lw a5, 0(t0) - addiw t0, t1, -1824 subw a4, zero, a5 - add a5, a3, t0 - sw a4, 72(a1) - sh2add a4, a0, a5 - lw t1, 0(a4) - li a4, 625 - subw t0, zero, t1 - slli a5, a4, 7 - add t1, a3, a5 - sw t0, 76(a1) - sh2add t0, a0, t1 - lw a4, 0(t0) + slli a5, t2, 7 + add t0, a3, a5 + sw a4, 76(a1) + sh2add t1, a0, t0 + lui t0, 21 + lw a4, 0(t1) subw a5, zero, a4 - addiw a4, t2, -2016 - li t2, 1375 + addiw a4, t0, -2016 add t1, a3, a4 sw a5, 80(a1) + sh2add a5, a0, t1 + lw t0, 0(a5) + li a5, 1375 + subw a4, zero, t0 + slli t2, a5, 6 + add t1, a3, t2 + sw a4, 84(a1) sh2add t0, a0, t1 - slli t1, t2, 6 lw a5, 0(t0) - add t0, a3, t1 + lui t0, 22 subw a4, zero, a5 - sh2add a5, a0, t0 - sw a4, 84(a1) - lw a4, 0(a5) - lui a5, 22 - subw t1, zero, a4 - addiw t0, a5, 1888 - sw t1, 88(a1) - add t1, a3, t0 - sh2add a4, a0, t1 + addiw t1, t0, 1888 + add a5, a3, t1 + sw a4, 88(a1) + sh2add a4, a0, a5 addiw a0, a0, 1 - lw a5, 0(a4) + lw t0, 0(a4) li a4, 1000 - subw a3, zero, a5 + subw a3, zero, t0 sw a3, 92(a1) - bge a0, a4, label1879 + bge a0, a4, label1588 add a2, a2, s0 mv a4, zero mv a1, a2 - j label1369 -label1879: - auipc a1, %pcrel_hi(cmmc_parallel_body_payload_3) + j label1584 +label1588: + auipc a0, %pcrel_hi(cmmc_parallel_body_payload_3) mv s4, zero mv s6, zero - addi s3, a1, %pcrel_lo(label1879) + addi s3, a0, %pcrel_lo(label1588) .p2align 2 -label1376: +label1589: auipc s5, %pcrel_hi(cmmc_parallel_body_payload_3) mv a0, zero li a1, 1000 - sw s6, %pcrel_lo(label1376)(s5) + sw s6, %pcrel_lo(label1589)(s5) sw s6, 4(s3) sd s1, 8(s3) mv a2, s2 jal cmmcParallelFor li a0, 1000 addiw s4, s4, 1 - lw s6, %pcrel_lo(label1376)(s5) - bge s4, a0, label1381 + lw s6, %pcrel_lo(label1589)(s5) + bge s4, a0, label1593 add s1, s1, s0 - j label1376 + j label1589 +label1593: + li a0, 92 + jal _sysy_stoptime + mv a0, s6 + jal putint + mv a0, zero + j label1599 .p2align 2 cmmc_parallel_body_0: - addi sp, sp, -24 - mv t5, a1 -pcrel772: - auipc a4, %pcrel_hi(b) - li a5, 125 - sd s1, 0(sp) - addi a2, a4, %pcrel_lo(pcrel772) - slli a3, a5, 5 - sd s0, 8(sp) - sh2add t2, a3, a3 - sh1add t0, a3, a3 - slli a5, a3, 1 - mul a1, a0, a3 - sd s2, 16(sp) - slli t3, t0, 1 - slli t1, a5, 1 - add t4, a2, a1 -pcrel773: + addi sp, sp, -32 + mv t4, a1 +pcrel1081: + auipc a2, %pcrel_hi(b) +pcrel1082: auipc a1, %pcrel_hi(a) - addi a4, a1, %pcrel_lo(pcrel773) - mv a1, t4 - mv t6, zero - j label5 + sd s0, 0(sp) + addi t3, a2, %pcrel_lo(pcrel1081) + addi t5, a1, %pcrel_lo(pcrel1082) + li a2, 125 + sd s1, 8(sp) + slli a3, a2, 5 + sd s2, 16(sp) + sh2add t1, a3, a3 + sh1add a5, a3, a3 + slli a4, a3, 1 + sd s3, 24(sp) + slli t2, a5, 1 + slli t0, a4, 1 + j label2 .p2align 2 -label9: +label8: li a6, 125 - lui s0, 63 + lui s2, 63 slli t6, a6, 11 + add a7, a1, t6 + sh2add s0, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 1952 - lui s0, 64 - add a7, a2, a6 - sw t6, 256(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, 1856 - lui s0, 66 - add a7, a2, t6 - sw a6, 260(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 65 - sw t6, 264(a1) - addiw t6, a6, 1760 + lw a6, 0(s0) + addiw t6, s2, 1952 + sh2add s1, a0, a7 + lui s2, 64 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 1664 - lui s0, 67 - add a7, a2, a6 - sw t6, 268(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, 1568 + lw a6, 0(s0) + addiw t6, s2, 1856 + sh2add s1, a0, a7 + lui s2, 65 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 add a7, a2, t6 - sw a6, 272(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 68 - addiw a7, a6, 1472 - sw t6, 276(a1) - add t6, a2, a7 - sh2add a6, a0, t6 - lui t6, 69 - lw a7, 0(a6) - addiw a6, t6, 1376 - sw a7, 280(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - li a7, 1125 - lw a6, 0(t6) - slli t6, a7, 8 - add s0, a2, t6 - sw a6, 284(a1) + lw a6, 0(s0) + addiw t6, s2, 1760 + sh2add s1, a0, a7 + lui s2, 66 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 + add s1, a2, t6 + lw a6, 0(s0) + addiw t6, s2, 1664 + sh2add a7, a0, s1 + lui s2, 67 + add s0, a1, t6 + add s1, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 1568 sh2add a7, a0, s0 + lui s2, 68 + lw a6, 0(a7) + sh2add a7, a0, s1 + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + addiw t6, s2, 1472 + sh2add s0, a0, a7 + li s2, 1125 + sh2add a7, a0, s1 + lw a6, 0(s0) + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s0, a0, a7 + sh2add a7, a0, s1 + lw a6, 0(s0) + lui s0, 69 + addiw t6, s0, 1376 + sw a6, 0(a7) + add s0, a2, t6 + add s1, a1, t6 + slli t6, s2, 8 + sh2add a7, a0, s1 + lui s2, 73 lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 + add a7, a2, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 lui a7, 71 addiw t6, a7, 1184 - sw a6, 288(a1) + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - lui t6, 72 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 1088 - sw a7, 292(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 73 - lw a6, 0(t6) - addiw t6, a7, 992 - add s0, a2, t6 - sw a6, 296(a1) - lui t6, 74 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 896 - sw a7, 300(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 75 - lw a6, 0(t6) - addiw t6, a7, 800 - add s0, a2, t6 - sw a6, 304(a1) + sh2add a7, a0, s1 + lui s1, 72 + lw a6, 0(a7) + addiw t6, s1, 1088 + sh2add a7, a0, s0 + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + addiw t6, s2, 992 + sh2add s0, a0, a7 + lui s2, 75 + sh2add a7, a0, s1 + lw a6, 0(s0) + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) sh2add a7, a0, s0 + lui s0, 74 lw a6, 0(a7) + addiw t6, s0, 896 + sh2add a7, a0, s1 + add s0, a2, t6 + add s1, a1, t6 + sw a6, 0(a7) + addiw t6, s2, 800 + sh2add a7, a0, s1 + lui s2, 77 + lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 + add a7, a2, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 lui a7, 76 addiw t6, a7, 704 - sw a6, 308(a1) + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - lui t6, 77 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 608 - sw a7, 312(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - li a7, 625 - lw a6, 0(t6) - slli t6, a7, 9 - add s0, a2, t6 - sw a6, 316(a1) - lui t6, 79 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 416 - sw a7, 320(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 80 - lw a6, 0(t6) - addiw t6, a7, 320 + sh2add a7, a0, s1 + addiw t6, s2, 608 + lw a6, 0(a7) + li s2, 625 + add s1, a1, t6 + sh2add a7, a0, s0 add s0, a2, t6 - sw a6, 324(a1) + sw a6, 0(a7) + slli t6, s2, 9 + sh2add a7, a0, s1 + lui s2, 79 + add s1, a1, t6 + lw a6, 0(a7) sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 416 + sh2add a7, a0, s1 + lui s2, 80 + add s1, a1, t6 lw a6, 0(a7) - lui a7, 81 - addiw t6, a7, 224 - sw a6, 328(a1) + sh2add a7, a0, s0 add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 320 + sh2add a7, a0, s1 + lui s2, 81 + add s1, a1, t6 + lw a6, 0(a7) sh2add a7, a0, s0 - lui s0, 82 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 224 + sh2add a7, a0, s1 + lui s2, 83 lw a6, 0(a7) - addiw t6, s0, 128 - lui s0, 83 - add a7, a2, t6 - sw a6, 332(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 32 - lui s0, 84 - add a7, a2, a6 - sw t6, 336(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -64 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 340(a1) - sh2add a6, a0, a7 - lui a7, 85 - lw t6, 0(a6) - addiw a6, a7, -160 - sw t6, 344(a1) - add t6, a2, a6 - li a6, 1375 - sh2add s0, a0, t6 - slli t6, a6, 8 - lw a7, 0(s0) + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 82 + addiw t6, a7, 128 + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - sw a7, 348(a1) - sh2add a7, a0, s0 - lui s0, 87 + sh2add a7, a0, s1 + addiw t6, s2, 32 lw a6, 0(a7) - addiw t6, s0, -352 - lui s0, 88 - add a7, a2, t6 - sw a6, 352(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -448 - lui s0, 89 - add a7, a2, a6 - sw t6, 356(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -544 - lui s0, 91 - add a7, a2, t6 - sw a6, 360(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 90 - sw t6, 364(a1) - addiw t6, a6, -640 - add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -736 - lui s0, 92 - add a7, a2, a6 - sw t6, 368(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -832 - li s0, 375 - add a7, a2, t6 - sw a6, 372(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 93 - sw t6, 376(a1) - addiw t6, a6, -928 + lui s2, 85 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - slli a6, s0, 10 - lui s0, 95 - add a7, a2, a6 - sw t6, 380(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -1120 - lui s0, 96 + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 84 + addiw t6, a7, -64 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -160 + lw a6, 0(a7) + lui s2, 87 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 384(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -1216 - lui s0, 97 - add a7, a2, a6 - sw t6, 388(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -1312 + lw a6, 0(s1) + sh2add s0, a0, a7 + li a7, 1375 + slli t6, a7, 8 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -352 + lw a6, 0(a7) + lui s2, 89 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 392(a1) - sh2add a6, a0, a7 - lui a7, 98 - lw t6, 0(a6) - addiw a6, a7, -1408 - add s0, a2, a6 - sw t6, 396(a1) - lui a6, 99 - sh2add t6, a0, s0 - lw a7, 0(t6) - addiw t6, a6, -1504 - sw a7, 400(a1) + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 88 + addiw t6, a7, -448 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -544 + lw a6, 0(a7) + lui s2, 90 + add s1, a1, t6 + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -640 + sh2add a7, a0, s1 + lui s2, 91 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -736 + sh2add a7, a0, s1 + lui s2, 93 + lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 100 - sw t6, 404(a1) - addiw t6, a6, -1600 + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 92 + addiw t6, a7, -832 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -928 + lw a6, 0(a7) + lui s2, 95 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lui a7, 101 - lw t6, 0(a6) - addiw a6, a7, -1696 - sw t6, 408(a1) - add t6, a2, a6 - sh2add a7, a0, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 + li a7, 375 + slli t6, a7, 10 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -1120 + lw a6, 0(a7) + lui s2, 96 + add s1, a1, t6 + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -1216 + sh2add a7, a0, s1 + lui s2, 99 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + sh2add a7, a0, s1 + lui s1, 97 + lw a6, 0(a7) + addiw t6, s1, -1312 + sh2add a7, a0, s0 + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) + sh2add a7, a0, s0 + lui s0, 98 + lw a6, 0(a7) + addiw t6, s0, -1408 + sh2add a7, a0, s1 + add s0, a2, t6 + add s1, a1, t6 + sw a6, 0(a7) + addiw t6, s2, -1504 + sh2add a7, a0, s1 + lui s2, 100 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -1600 + sh2add a7, a0, s1 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + sh2add a7, a0, s1 + lui s1, 101 + lw a6, 0(a7) + addiw t6, s1, -1696 + sh2add a7, a0, s0 + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) + sh2add a1, a0, s1 + sh2add a7, a0, s0 addiw a0, a0, 1 lw a6, 0(a7) - sw a6, 412(a1) - ble t5, a0, label11 - add t4, t4, a3 + sw a6, 0(a1) + ble t4, a0, label9 +.p2align 2 +label2: + mv a1, t5 mv t6, zero - mv a1, t4 .p2align 2 -label5: - mul a6, t6, a3 - li s2, 875 +label4: + mul a7, t6, a3 addiw t6, t6, 64 - add a2, a4, a6 - sh2add a7, a0, a2 - add a6, a2, a3 - sh2add s0, a0, a6 - add a6, a2, a5 - lw s1, 0(a7) - sw s1, 0(a1) - lw a7, 0(s0) - sw a7, 4(a1) - sh2add a7, a0, a6 - add a6, a2, t0 - lw s0, 0(a7) - sh2add a7, a0, a6 - sw s0, 8(a1) + add a2, t3, a7 + sh2add a6, a0, a2 + add s1, a2, a3 + sh2add a7, a0, a1 lw s0, 0(a7) - add a7, a2, t1 - sh2add a6, a0, a7 - sw s0, 12(a1) - add a7, a2, t2 - lw s0, 0(a6) - sh2add s1, a0, a7 - sw s0, 16(a1) - add s0, a2, t3 - lw a6, 0(s1) - sh2add a7, a0, s0 - li s1, 875 - sw a6, 20(a1) - lw a6, 0(a7) - slli a7, s1, 5 - li s1, 1125 - sw a6, 24(a1) - add a6, a2, a7 + sh2add a7, a0, s1 + add s1, a2, a4 + sw s0, 0(a6) + add a6, a1, a3 sh2add s0, a0, a6 - li a6, 125 + sh2add a6, a0, s1 + lw s2, 0(s0) + sw s2, 0(a7) + add s2, a1, a4 + sh2add s0, a0, s2 + add s2, a2, a5 lw a7, 0(s0) - sw a7, 28(a1) - slli a7, a6, 8 + add s0, a1, a5 + sh2add s1, a0, s0 + sw a7, 0(a6) + add s0, a2, t0 + lw a6, 0(s1) + sh2add a7, a0, s2 + add s2, a2, t1 + sw a6, 0(a7) + add a7, a1, t0 + sh2add a6, a0, s0 + sh2add s1, a0, a7 + sh2add a7, a0, s2 + lw s0, 0(s1) + li s2, 875 + sw s0, 0(a6) + add s0, a2, t2 + add a6, a1, t1 + sh2add s3, a0, a6 + sh2add a6, a0, s0 + lw s1, 0(s3) + sw s1, 0(a7) + add s1, a1, t2 + sh2add a7, a0, s1 + lw s0, 0(a7) + slli a7, s2, 5 + add s1, a2, a7 + sw s0, 0(a6) + add s0, a1, a7 + sh2add a6, a0, s1 + sh2add s2, a0, s0 + li s1, 125 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s1, 8 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 32(a1) + li s1, 1125 + lw a7, 0(s2) + sw a7, 0(a6) slli a7, s1, 5 - add a6, a2, a7 - sh2add s0, a0, a6 - li a6, 625 - lw a7, 0(s0) - sw a7, 36(a1) - slli a7, a6, 6 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - li a6, 1375 - slli s1, a6, 5 - sw a7, 40(a1) - add a7, a2, s1 - li s1, 375 - sh2add a6, a0, a7 - lw s0, 0(a6) - slli a6, s1, 7 - li s1, 1625 - add a7, a2, a6 - sw s0, 44(a1) - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 48(a1) - slli a6, s1, 5 - add a7, a2, a6 - sh2add s0, a0, a7 - slli a7, s2, 6 - lw a6, 0(s0) - li s2, 125 + lw a7, 0(s2) + li s0, 625 + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1375 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 5 + add s0, a1, a7 add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + li s0, 375 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - sw a6, 52(a1) - li s1, 1875 - lw a6, 0(s0) + sh2add a6, a0, s2 + li s1, 1625 + lw a7, 0(s0) + sw a7, 0(a6) slli a7, s1, 5 - sw a6, 56(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - slli a6, s2, 9 + add s2, a1, a7 + add s0, a2, a7 + sh2add s1, a0, s2 + sh2add a6, a0, s0 + lw a7, 0(s1) + li s0, 875 + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1875 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 5 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - lui s2, 17 - add s1, a2, a6 + li s2, 125 + sw a7, 0(a6) + slli a7, s2, 9 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 17 + sw a7, 0(a6) + addiw a7, s1, -1632 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1125 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lui s0, 19 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s0, -1824 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - sw a7, 60(a1) - addiw s1, s2, -1632 + sh2add a6, a0, s2 + li s1, 625 lw a7, 0(s0) - lui s2, 26 - add a6, a2, s1 - sw a7, 64(a1) - sh2add a7, a0, a6 - lw s0, 0(a7) - li a7, 1125 - slli a6, a7, 6 - sw s0, 68(a1) - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) - lui a7, 19 - sw a6, 72(a1) - addiw a6, a7, -1824 - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) - li a7, 625 - slli s1, a7, 7 - sw a6, 76(a1) - add a6, a2, s1 - sh2add a7, a0, a6 - lw s0, 0(a7) - lui a7, 21 - addiw a6, a7, -2016 - sw s0, 80(a1) - add s0, a2, a6 - li a6, 1375 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 + sh2add a6, a0, s2 + lui s0, 21 lw a7, 0(s1) - li s1, 375 - sw a7, 84(a1) - slli a7, a6, 6 - add s0, a2, a7 - sh2add a6, a0, s0 - lw a7, 0(a6) - lui a6, 22 - sw a7, 88(a1) - addiw a7, a6, 1888 + sw a7, 0(a6) + addiw a7, s0, -2016 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + li s2, 1375 + lw a7, 0(s0) + sw a7, 0(a6) + slli a7, s2, 6 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 22 + sw a7, 0(a6) + addiw a7, s1, 1888 + add s2, a1, a7 add s0, a2, a7 + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - slli a6, s1, 8 - lui s1, 24 - add s0, a2, a6 - sw a7, 92(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 96(a1) - addiw a6, s1, 1696 - add a7, a2, a6 - sh2add s0, a0, a7 - li a7, 1625 - lw a6, 0(s0) - sw a6, 100(a1) - slli a6, a7, 6 - add s0, a2, a6 - addiw a6, s2, 1504 + lw a7, 0(s1) + li s0, 375 + sw a7, 0(a6) + slli a7, s0, 8 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 - li s2, 125 - add s0, a2, a6 + sh2add a6, a0, s2 + lui s0, 24 lw a7, 0(s1) - sw a7, 104(a1) - sh2add a7, a0, s0 - li s0, 875 - lw a6, 0(a7) - slli s1, s0, 7 - add a7, a2, s1 - lui s1, 28 - sw a6, 108(a1) - sh2add a6, a0, a7 - lw s0, 0(a6) - addiw a6, s1, 1312 - li s1, 1875 - add a7, a2, a6 - sw s0, 112(a1) - sh2add s0, a0, a7 - slli a7, s1, 6 - lw a6, 0(s0) + sw a7, 0(a6) + addiw a7, s0, 1696 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + li s0, 1625 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lui s0, 26 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s0, 1504 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + li s1, 875 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lui s0, 28 + lw a7, 0(s1) + sw a7, 0(a6) + addiw a7, s0, 1312 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1875 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 6 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 30 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 1120 + add s2, a1, a7 add s0, a2, a7 - sw a6, 116(a1) + sh2add s1, a0, s2 sh2add a6, a0, s0 - addiw s0, s1, 1120 - lw a7, 0(a6) - add a6, a2, s0 - sw a7, 120(a1) - sh2add a7, a0, a6 - lw s1, 0(a7) - slli a7, s2, 10 - li s2, 1125 - add a6, a2, a7 - sw s1, 124(a1) - sh2add s0, a0, a6 + lw a7, 0(s1) + li s0, 125 + sw a7, 0(a6) + slli a7, s0, 10 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 32 lw a7, 0(s0) - addiw a6, s1, 928 + sw a7, 0(a6) + addiw a7, s1, 928 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 33 - sw a7, 128(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 132(a1) - addiw a6, s1, 832 - add a7, a2, a6 - sh2add s0, a0, a7 - lui a7, 34 - lw a6, 0(s0) - sw a6, 136(a1) - addiw a6, a7, 736 - add s1, a2, a6 - slli a6, s2, 7 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 832 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - lui s2, 60 - add s1, a2, a6 + sh2add a6, a0, s2 + lui s1, 34 lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 736 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - lui s1, 36 - sw a7, 140(a1) - addiw a6, s1, 544 + sh2add a6, a0, s2 + li s1, 1125 lw a7, 0(s0) - lui s1, 37 - sw a7, 144(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 148(a1) - addiw a6, s1, 448 + sw a7, 0(a6) + slli a7, s1, 7 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lui s1, 36 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s1, 544 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + lui s0, 37 + sw a7, 0(a6) + addiw a7, s0, 448 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 38 - add a7, a2, a6 - sh2add s0, a0, a7 + lw a7, 0(s0) + sw a7, 0(a6) addiw a7, s1, 352 - lw a6, 0(s0) - li s1, 625 - sw a6, 152(a1) - add a6, a2, a7 - sh2add s0, a0, a6 + add s2, a1, a7 + add s0, a2, a7 + sh2add s1, a0, s2 + sh2add a6, a0, s0 + lw a7, 0(s1) + li s0, 625 + sw a7, 0(a6) + slli a7, s0, 8 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 lw a7, 0(s0) - sw a7, 156(a1) - slli a7, s1, 8 lui s1, 40 - add a6, a2, a7 - sh2add s0, a0, a6 - lw a7, 0(s0) - sw a7, 160(a1) + sw a7, 0(a6) addiw a7, s1, 160 - lui s1, 41 - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, 64 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + lui s0, 41 + sw a7, 0(a6) + addiw a7, s0, 64 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + lui s2, 42 + sw a7, 0(a6) + addiw a7, s2, -32 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + li s0, 1375 + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - lui s1, 42 - add s0, a2, a6 - sw a7, 164(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 168(a1) - addiw a6, s1, -32 - li s1, 1375 - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 172(a1) - slli a6, s1, 7 - lui s1, 44 - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s1, -224 - lw a6, 0(s0) - lui s1, 46 - sw a6, 176(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - lui a6, 45 + lui s2, 44 + sw a7, 0(a6) + addiw a7, s2, -224 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 lw a7, 0(s0) - sw a7, 180(a1) - addiw a7, a6, -320 + lui s1, 45 + sw a7, 0(a6) + addiw a7, s1, -320 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + lui s2, 46 + sw a7, 0(a6) + addiw a7, s2, -416 + add s2, a1, a7 add s0, a2, a7 + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 184(a1) - addiw a7, s1, -416 - lui s1, 48 - add a6, a2, a7 - sh2add s0, a0, a6 - li a6, 375 + lw a7, 0(s1) + li s0, 375 + sw a7, 0(a6) + slli a7, s0, 9 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - sw a7, 188(a1) - slli a7, a6, 9 + lui s2, 48 + sw a7, 0(a6) + addiw a7, s2, -608 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + lui s1, 49 + sw a7, 0(a6) + addiw a7, s1, -704 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, s1, -608 lui s1, 50 - add s0, a2, a6 - sw a7, 192(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - lui a7, 49 - sw a6, 196(a1) - addiw a6, a7, -704 - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) + lw a7, 0(s2) + sw a7, 0(a6) addiw a7, s1, -800 - li s1, 1625 - sw a6, 200(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - slli a6, s1, 7 - lw a7, 0(s0) + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + li s0, 1625 + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 52 - add s0, a2, a6 - sw a7, 204(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 208(a1) - addiw a6, s1, -992 - lui s1, 53 - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s1, -1088 - lw a6, 0(s0) - lui s1, 54 - sw a6, 212(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, -1184 lw a7, 0(s0) - li s1, 875 - sw a7, 216(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - slli a7, s1, 8 - lw a6, 0(s0) - lui s1, 57 + sw a7, 0(a6) + addiw a7, s1, -992 + add s2, a1, a7 add s0, a2, a7 - sw a6, 220(a1) + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - lui a6, 56 - sw a7, 224(a1) - addiw a7, a6, -1376 + lw a7, 0(s1) + lui s0, 53 + sw a7, 0(a6) + addiw a7, s0, -1088 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lui s2, 54 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s2, -1184 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + li s1, 875 + sw a7, 0(a6) + slli a7, s1, 8 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 228(a1) + lw a7, 0(s2) + lui s0, 56 + sw a7, 0(a6) + addiw a7, s0, -1376 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + lui s1, 57 + sw a7, 0(a6) addiw a7, s1, -1472 - li s1, 1875 - add a6, a2, a7 - sh2add s0, a0, a6 - lui a6, 58 - lw a7, 0(s0) - sw a7, 232(a1) - addiw a7, a6, -1568 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - slli a6, s1, 7 - sw a7, 236(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s2, -1760 - lw a6, 0(s0) - lui s2, 61 - add s0, a2, a7 - addiw a7, s2, -1856 + lw a7, 0(s2) + lui s0, 58 + sw a7, 0(a6) + addiw a7, s0, -1568 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + li s1, 1875 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 - sw a6, 240(a1) - lw a6, 0(s1) + sh2add a6, a0, s2 + lui s0, 60 + lw a7, 0(s1) + sw a7, 0(a6) + addiw a7, s0, -1760 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 61 + sw a7, 0(a6) + addiw a7, s1, -1856 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 62 - sw a6, 244(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, -1952 lw a7, 0(s0) - add s0, a2, a6 - sw a7, 248(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - li a7, 960 - sw a6, 252(a1) - bge t6, a7, label9 - addi a1, a1, 256 - j label5 -label11: - ld s1, 0(sp) - ld s0, 8(sp) + sw a7, 0(a6) + addiw a7, s1, -1952 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + sw a7, 0(a6) + li a6, 960 + bge t6, a6, label8 + li a6, 125 + slli a2, a6, 11 + add a1, a1, a2 + j label4 +label9: + ld s0, 0(sp) + ld s1, 8(sp) ld s2, 16(sp) - addi sp, sp, 24 + ld s3, 24(sp) + addi sp, sp, 32 ret .p2align 2 cmmc_parallel_body_1: - addi sp, sp, -80 - mv t4, a1 -pcrel1086: + addi sp, sp, -56 + mv t1, a1 +pcrel1303: auipc a4, %pcrel_hi(c) li a5, 125 - mv t6, a0 - addi a3, a4, %pcrel_lo(pcrel1086) - slli a2, a5, 5 + mv t4, a0 + addi a2, a4, %pcrel_lo(pcrel1303) + slli a3, a5, 5 sd s0, 0(sp) - sh2add t1, a2, a2 - sh1add a5, a2, a2 - slli a4, a2, 1 - mul a1, a0, a2 + li a5, 1000 + mul a1, a0, a3 sd s5, 8(sp) - slli t0, a4, 1 -pcrel1087: - auipc a0, %pcrel_hi(a) - add t3, a3, a1 - addi t5, a0, %pcrel_lo(pcrel1087) +pcrel1304: + auipc a0, %pcrel_hi(b) + add a4, a2, a1 sd s1, 16(sp) -pcrel1088: - auipc a1, %pcrel_hi(b) + addi t2, a0, %pcrel_lo(pcrel1304) +pcrel1305: + auipc a1, %pcrel_hi(a) sd s6, 24(sp) - addi a3, a1, %pcrel_lo(pcrel1088) - sd s3, 32(sp) - sd s4, 40(sp) + addi t3, a1, %pcrel_lo(pcrel1305) + sd s4, 32(sp) + li a1, 992 + sd s3, 40(sp) sd s2, 48(sp) - sd s7, 56(sp) - sd s9, 64(sp) - sd s8, 72(sp) - mul a1, t6, a2 - mv a6, t3 - mv a0, zero - add t2, t5, a1 - mv a1, t2 - mv s0, zero - mv s1, zero - j label781 + mul a0, t4, a3 + mv a2, t2 + mv t5, zero + add t0, t3, a0 + mv a0, t0 + mv a6, zero + mv a7, zero + j label1090 .p2align 2 -label786: - li s0, 125 - lui s6, 17 - slli s4, s0, 9 - lw s0, 64(a1) - add s2, a7, s4 - sh2add s3, a0, s2 - lw s4, 0(s3) - mulw s5, s0, s4 - addiw s4, s6, -1632 - addw s2, s1, s5 - li s6, 1125 - add s0, a7, s4 - lw s1, 68(a1) - sh2add s3, a0, s0 - lw s4, 0(s3) - mulw s5, s1, s4 - slli s4, s6, 6 - addw s0, s2, s5 - lui s6, 19 - add s1, a7, s4 - lw s2, 72(a1) - sh2add s3, a0, s1 - lw s4, 0(s3) - mulw s5, s2, s4 - lw s2, 76(a1) - addiw s4, s6, -1824 - addw s1, s0, s5 - li s6, 625 - add s0, a7, s4 - sh2add s3, a0, s0 - lw s5, 0(s3) +label1095: + lw a6, 64(a0) + lw s3, 64(t6) + lw s1, 68(a0) + mulw s2, a6, s3 + lw s3, 68(t6) + addw s0, a7, s2 + lw s2, 72(a0) + mulw a7, s1, s3 + lw s3, 72(t6) + addw a6, s0, a7 + lw s0, 76(a0) + mulw s1, s2, s3 + lw s2, 76(t6) + addw a7, a6, s1 + lw s1, 80(a0) + lw s3, 80(t6) + mulw s4, s0, s2 + lw s0, 84(a0) + addw a6, a7, s4 + mulw s2, s1, s3 + lw s3, 84(t6) + addw a7, a6, s2 + lw s1, 88(a0) + mulw s2, s0, s3 + lw s3, 88(t6) + addw a6, a7, s2 + lw s0, 92(a0) + mulw s2, s1, s3 + lw s1, 92(t6) + addw a7, a6, s2 + sh2add t6, t5, a4 + addiw t5, t5, 1 + mulw a6, s0, s1 + addw a0, a7, a6 + sw a0, 0(t6) + bge t5, a5, label1266 + add a2, a2, a3 + mv a0, t0 + mv a6, zero + mv a7, zero +.p2align 2 +label1090: + sh2add t6, a6, a2 + lw s0, 0(a0) + addiw a6, a6, 16 + lw s1, 0(t6) + lw s4, 4(a0) + lw s5, 4(t6) + mulw s6, s0, s1 + mulw s3, s4, s5 + lw s4, 8(a0) + addw s2, s3, s6 + lw s5, 8(t6) + lw s3, 12(a0) + mulw s1, s4, s5 + lw s5, 12(t6) + addw s0, s2, s1 + lw s2, 16(a0) + mulw s4, s3, s5 + lw s5, 16(t6) + addw s1, s0, s4 + lw s3, 20(a0) mulw s4, s2, s5 - lw s2, 80(a1) - slli s5, s6, 7 + lw s5, 20(t6) addw s0, s1, s4 - lui s6, 21 - add s1, a7, s5 - sh2add s3, a0, s1 - lw s4, 0(s3) + lw s4, 24(a0) + mulw s1, s3, s5 + lw s5, 24(t6) + addw s2, s0, s1 + lw s3, 28(a0) + mulw s0, s4, s5 + lw s4, 28(t6) + addw s1, s2, s0 + lw s2, 32(a0) + mulw s5, s3, s4 + lw s4, 32(t6) + addw s0, s1, s5 + lw s3, 36(a0) + lw s6, 36(t6) mulw s5, s2, s4 - lw s2, 84(a1) - addiw s4, s6, -2016 + lw s2, 40(a0) addw s1, s0, s5 - li s6, 1375 - add s0, a7, s4 - sh2add s3, a0, s0 - lw s4, 0(s3) - mulw s5, s2, s4 - lw s2, 88(a1) - slli s4, s6, 6 - addw s0, s1, s5 - lui s6, 22 - add s1, a7, s4 - sh2add s3, a0, s1 - lw s5, 0(s3) + mulw s4, s3, s6 + lw s5, 40(t6) + addw s0, s1, s4 + lw s3, 44(a0) mulw s4, s2, s5 - addiw s2, s6, 1888 + lw s5, 44(t6) addw s1, s0, s4 - add s3, a7, s2 - lw a7, 92(a1) - sh2add s0, a0, s3 - addiw a0, a0, 1 - lw s4, 0(s0) - mulw s2, a7, s4 - li a7, 1000 - addw a1, s1, s2 - sw a1, 0(a6) - bge a0, a7, label1003 - addi a6, a6, 4 - mv a1, t2 - mv s0, zero - mv s1, zero -.p2align 2 -label781: - mul s3, s0, a2 - lw s2, 0(a1) - addiw s0, s0, 16 - add a7, a3, s3 - sh2add s4, a0, a7 - add s6, a7, a2 - sh2add s7, a0, s6 - lw s3, 0(s4) - lw s5, 4(a1) - lw s9, 0(s7) - mulw s8, s2, s3 - add s7, a7, a4 - lw s2, 8(a1) - mulw s6, s5, s9 - sh2add s5, a0, s7 - addw s4, s6, s8 - add s6, a7, a5 - lw s8, 0(s5) - sh2add s5, a0, s6 - add s6, a7, t0 - mulw s7, s2, s8 - addw s3, s4, s7 - lw s4, 12(a1) - lw s8, 0(s5) - sh2add s5, a0, s6 - add s6, a7, t1 - mulw s7, s4, s8 - lw s4, 16(a1) - addw s2, s3, s7 - lw s7, 0(s5) - sh2add s5, a0, s6 - mulw s8, s4, s7 - lw s4, 20(a1) - addw s3, s2, s8 - lw s7, 0(s5) - li s8, 125 - li s5, 375 - mulw s6, s4, s7 - slli s7, s5, 6 - addw s2, s3, s6 - lw s5, 24(a1) - add s3, a7, s7 - sh2add s4, a0, s3 - lw s6, 0(s4) - li s4, 875 - mulw s7, s5, s6 - slli s6, s4, 5 - addw s3, s2, s7 - lw s4, 28(a1) - add s2, a7, s6 - sh2add s5, a0, s2 - lw s6, 0(s5) - mulw s7, s4, s6 - lw s4, 32(a1) - slli s6, s8, 8 - addw s2, s3, s7 - li s8, 1125 - add s3, a7, s6 - sh2add s5, a0, s3 - lw s6, 0(s5) - mulw s7, s4, s6 - lw s4, 36(a1) - slli s6, s8, 5 - addw s3, s2, s7 - li s8, 1375 - add s2, a7, s6 - sh2add s5, a0, s2 - lw s7, 0(s5) - li s5, 625 - mulw s6, s4, s7 - slli s7, s5, 6 - addw s2, s3, s6 - lw s5, 40(a1) - add s3, a7, s7 - sh2add s4, a0, s3 - lw s6, 0(s4) - mulw s7, s5, s6 - lw s5, 44(a1) - slli s6, s8, 5 - addw s3, s2, s7 - li s8, 875 - add s2, a7, s6 - sh2add s4, a0, s2 - lw s7, 0(s4) - li s4, 375 - mulw s6, s5, s7 - slli s7, s4, 7 - addw s2, s3, s6 - lw s4, 48(a1) - add s3, a7, s7 - sh2add s5, a0, s3 - lw s6, 0(s5) - li s5, 1625 - mulw s7, s4, s6 - slli s6, s5, 5 - addw s3, s2, s7 - lw s5, 52(a1) - add s4, a7, s6 - sh2add s2, a0, s4 - lw s7, 0(s2) - mulw s6, s5, s7 - slli s7, s8, 6 - addw s4, s3, s6 - li s8, 1875 - add s2, a7, s7 - lw s3, 56(a1) - sh2add s5, a0, s2 - lw s6, 0(s5) - mulw s7, s3, s6 - lw s3, 60(a1) - slli s6, s8, 5 - addw s2, s4, s7 - add s5, a7, s6 - sh2add s4, a0, s5 - lw s7, 0(s4) - mulw s6, s3, s7 - addw s5, s2, s6 - li s2, 992 - addw s1, s1, s5 - bge s0, s2, label786 - addi a1, a1, 64 - j label781 + lw s4, 48(a0) + mulw s2, s3, s5 + lw s3, 48(t6) + addw s0, s1, s2 + lw s2, 52(a0) + lw s5, 52(t6) + mulw s6, s4, s3 + lw s3, 56(a0) + addw s1, s0, s6 + mulw s4, s2, s5 + lw s5, 56(t6) + addw s0, s1, s4 + lw s2, 60(a0) + lw s4, 60(t6) + mulw s6, s3, s5 + addw s1, s0, s6 + mulw s3, s2, s4 + addw s0, s1, s3 + addw a7, a7, s0 + bge a6, a1, label1095 + addi a0, a0, 64 + j label1090 .p2align 2 -label1003: - addiw t6, t6, 1 - ble t4, t6, label790 - add t3, t3, a2 - mul a1, t6, a2 - mv a0, zero - mv s0, zero - mv s1, zero - mv a6, t3 - add t2, t5, a1 - mv a1, t2 - j label781 -label790: +label1266: + addiw t4, t4, 1 + ble t1, t4, label1099 + add a4, a4, a3 + mul a0, t4, a3 + mv a2, t2 + mv t5, zero + mv a6, zero + mv a7, zero + add t0, t3, a0 + mv a0, t0 + j label1090 +label1099: ld s0, 0(sp) ld s5, 8(sp) ld s1, 16(sp) ld s6, 24(sp) - ld s3, 32(sp) - ld s4, 40(sp) + ld s4, 32(sp) + ld s3, 40(sp) ld s2, 48(sp) - ld s7, 56(sp) - ld s9, 64(sp) - ld s8, 72(sp) - addi sp, sp, 80 + addi sp, sp, 56 ret .p2align 2 cmmc_parallel_body_2: mv t0, a0 mv a2, a1 addiw a4, a0, 3 -pcrel1192: +pcrel1409: auipc a5, %pcrel_hi(cmmc_parallel_body_payload_2) - ld a3, %pcrel_lo(pcrel1192)(a5) - addi a1, a5, %pcrel_lo(pcrel1192) + ld a3, %pcrel_lo(pcrel1409)(a5) + addi a1, a5, %pcrel_lo(pcrel1409) lw a0, 8(a1) - ble a2, a4, label1090 + ble a2, a4, label1307 addiw t1, t0, 15 addiw a4, a2, -3 addiw a5, a2, -18 - bge t1, a4, label1141 + bge t1, a4, label1358 sh2add a1, t0, a3 - j label1100 + j label1317 .p2align 2 -label1103: +label1320: addi a1, a1, 64 .p2align 2 -label1100: +label1317: sw a0, 0(a1) addiw t0, t0, 16 sw a0, 4(a1) @@ -1547,59 +1664,59 @@ label1100: sw a0, 52(a1) sw a0, 56(a1) sw a0, 60(a1) - bgt a5, t0, label1103 + bgt a5, t0, label1320 mv a1, t0 -label1104: - ble a4, a1, label1090 +label1321: + ble a4, a1, label1307 sh2add a5, a1, a3 -label1108: +label1325: sw a0, 0(a5) addiw a1, a1, 4 sw a0, 4(a5) sw a0, 8(a5) sw a0, 12(a5) - ble a4, a1, label1178 + ble a4, a1, label1395 addi a5, a5, 16 - j label1108 -label1178: + j label1325 +label1395: mv t0, a1 -label1090: - ble a2, t0, label1097 +label1307: + ble a2, t0, label1314 sh2add a1, t0, a3 - j label1093 -label1096: + j label1310 +label1313: addi a1, a1, 4 -label1093: +label1310: addiw t0, t0, 1 sw a0, 0(a1) - bgt a2, t0, label1096 -label1097: + bgt a2, t0, label1313 +label1314: ret -label1141: +label1358: mv a1, t0 mv t0, zero - j label1104 + j label1321 .p2align 2 cmmc_parallel_body_3: mv t0, a0 addiw a5, a0, 3 -pcrel1347: +pcrel1564: auipc a0, %pcrel_hi(cmmc_parallel_body_payload_3) - addi a2, a0, %pcrel_lo(pcrel1347) + addi a2, a0, %pcrel_lo(pcrel1564) ld a3, 8(a2) - ble a1, a5, label1235 + ble a1, a5, label1452 addiw a0, t0, 15 addiw a4, a1, -3 addiw a5, a1, -18 - bge a0, a4, label1242 + bge a0, a4, label1459 sh2add a0, t0, a3 mv t1, zero - j label1210 + j label1427 .p2align 2 -label1214: +label1431: addi a0, a0, 64 .p2align 2 -label1210: +label1427: lw t4, 0(a0) addiw t0, t0, 16 lw t5, 4(a0) @@ -1633,17 +1750,17 @@ label1210: lw t4, 60(a0) addw t2, t3, t6 addw t1, t2, t4 - bgt a5, t0, label1214 + bgt a5, t0, label1431 mv a5, t0 mv t2, t1 -label1195: - ble a4, a5, label1246 +label1412: + ble a4, a5, label1463 sh2add a0, a5, a3 mv t0, t2 - j label1204 -label1208: + j label1421 +label1425: addi a0, a0, 16 -label1204: +label1421: lw t1, 0(a0) addiw a5, a5, 4 lw t4, 4(a0) @@ -1653,39 +1770,39 @@ label1204: lw t3, 12(a0) addw t1, t2, t5 addw t0, t1, t3 - bgt a4, a5, label1208 + bgt a4, a5, label1425 mv a0, t0 mv a4, t0 mv t0, a5 -label1215: - ble a1, t0, label1324 +label1432: + ble a1, t0, label1541 sh2add a0, t0, a3 mv a3, a4 - j label1222 -label1226: + j label1439 +label1443: addi a0, a0, 4 -label1222: +label1439: lw a5, 0(a0) addiw t0, t0, 1 addw a3, a3, a5 - bgt a1, t0, label1226 -label1219: + bgt a1, t0, label1443 +label1436: amoadd.w.aqrl a1, a3, (a2) ret -label1246: +label1463: mv a0, t1 mv a4, t1 - j label1215 -label1242: + j label1432 +label1459: mv a5, t0 mv t2, zero mv t1, zero mv t0, zero - j label1195 -label1324: + j label1412 +label1541: mv a3, a0 - j label1219 -label1235: + j label1436 +label1452: mv a4, zero mv a0, zero - j label1215 + j label1432 diff --git a/tests/SysY2022/performance/matmul2.sy.ir b/tests/SysY2022/performance/matmul2.sy.ir index 4bc2f50e6..f140ebcc6 100644 --- a/tests/SysY2022/performance/matmul2.sy.ir +++ b/tests/SysY2022/performance/matmul2.sy.ir @@ -3,7 +3,7 @@ internal func @putint(i32) -> void { NoMemoryRead NoMemoryWrite }; internal func @starttime(i32) -> void { NoMemoryRead NoMemoryWrite }; internal func @stoptime(i32) -> void { NoMemoryRead NoMemoryWrite }; internal [1000 * [1000 * i32]]* @a, align 8; -internal [1000 * [1000 * i32]]* @b, align 8 { Flexible }; +internal [1000 * [1000 * i32]]* @b, align 8 { Flexible Transposed }; internal [1000 * [1000 * i32]]* @c, align 8 { Flexible }; func @main() -> i32 { NoRecurse Entry } { ^entry: @@ -550,743 +550,823 @@ func @main() -> i32 { NoRecurse Entry } { internal func @cmmcParallelFor(i32, i32, i8*) -> void { NoRecurse }; internal func @cmmc_parallel_body_0(i32 %0, i32 %1) -> void { NoRecurse ParallelBody AlignedParallelBody } { ^b: - [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; ubr ^b1; ^b1: - i32 %4 = phi [^b, i32 %0] [^while.body1, i32 %425]; - [1000 * i32]* %5 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %4]; + i32 %4 = phi [^b, i32 %0] [^while.body1, i32 %528]; ubr ^while.body; ^while.body: - i32 %6 = phi [^b1, i32 0] [^while.body, i32 %263]; - [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %6]; - i32* %8 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %4]; - i32 %9 = load i32* %8; - i32* %10 = getelementptr &([1000 * i32]* %5)[i64 0][i32 %6]; - store i32* %10 with i32 %9; - [1000 * i32]* %11 = getelementptr &([1000 * i32]* %7)[i64 1]; + i32 %5 = phi [^b1, i32 0] [^while.body, i32 %326]; + [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %5]; + i32* %7 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %4]; + i32 %8 = load i32* %7; + [1000 * i32]* %9 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; + i32* %10 = getelementptr &([1000 * i32]* %9)[i64 0][i32 %4]; + store i32* %10 with i32 %8; + [1000 * i32]* %11 = getelementptr &([1000 * i32]* %6)[i64 1]; i32* %12 = getelementptr &([1000 * i32]* %11)[i64 0][i32 %4]; i32 %13 = load i32* %12; - i32* %14 = getelementptr &(i32* %10)[i64 1]; - store i32* %14 with i32 %13; - [1000 * i32]* %15 = getelementptr &([1000 * i32]* %7)[i64 2]; - i32* %16 = getelementptr &([1000 * i32]* %15)[i64 0][i32 %4]; - i32 %17 = load i32* %16; - i32* %18 = getelementptr &(i32* %10)[i64 2]; - store i32* %18 with i32 %17; - [1000 * i32]* %19 = getelementptr &([1000 * i32]* %7)[i64 3]; + [1000 * i32]* %14 = getelementptr &([1000 * i32]* %9)[i64 1]; + i32* %15 = getelementptr &([1000 * i32]* %14)[i64 0][i32 %4]; + store i32* %15 with i32 %13; + [1000 * i32]* %16 = getelementptr &([1000 * i32]* %6)[i64 2]; + i32* %17 = getelementptr &([1000 * i32]* %16)[i64 0][i32 %4]; + i32 %18 = load i32* %17; + [1000 * i32]* %19 = getelementptr &([1000 * i32]* %9)[i64 2]; i32* %20 = getelementptr &([1000 * i32]* %19)[i64 0][i32 %4]; - i32 %21 = load i32* %20; - i32* %22 = getelementptr &(i32* %10)[i64 3]; - store i32* %22 with i32 %21; - [1000 * i32]* %23 = getelementptr &([1000 * i32]* %7)[i64 4]; - i32* %24 = getelementptr &([1000 * i32]* %23)[i64 0][i32 %4]; - i32 %25 = load i32* %24; - i32* %26 = getelementptr &(i32* %10)[i64 4]; - store i32* %26 with i32 %25; - [1000 * i32]* %27 = getelementptr &([1000 * i32]* %7)[i64 5]; - i32* %28 = getelementptr &([1000 * i32]* %27)[i64 0][i32 %4]; - i32 %29 = load i32* %28; - i32* %30 = getelementptr &(i32* %10)[i64 5]; - store i32* %30 with i32 %29; - [1000 * i32]* %31 = getelementptr &([1000 * i32]* %7)[i64 6]; + store i32* %20 with i32 %18; + [1000 * i32]* %21 = getelementptr &([1000 * i32]* %6)[i64 3]; + i32* %22 = getelementptr &([1000 * i32]* %21)[i64 0][i32 %4]; + i32 %23 = load i32* %22; + [1000 * i32]* %24 = getelementptr &([1000 * i32]* %9)[i64 3]; + i32* %25 = getelementptr &([1000 * i32]* %24)[i64 0][i32 %4]; + store i32* %25 with i32 %23; + [1000 * i32]* %26 = getelementptr &([1000 * i32]* %6)[i64 4]; + i32* %27 = getelementptr &([1000 * i32]* %26)[i64 0][i32 %4]; + i32 %28 = load i32* %27; + [1000 * i32]* %29 = getelementptr &([1000 * i32]* %9)[i64 4]; + i32* %30 = getelementptr &([1000 * i32]* %29)[i64 0][i32 %4]; + store i32* %30 with i32 %28; + [1000 * i32]* %31 = getelementptr &([1000 * i32]* %6)[i64 5]; i32* %32 = getelementptr &([1000 * i32]* %31)[i64 0][i32 %4]; i32 %33 = load i32* %32; - i32* %34 = getelementptr &(i32* %10)[i64 6]; - store i32* %34 with i32 %33; - [1000 * i32]* %35 = getelementptr &([1000 * i32]* %7)[i64 7]; - i32* %36 = getelementptr &([1000 * i32]* %35)[i64 0][i32 %4]; - i32 %37 = load i32* %36; - i32* %38 = getelementptr &(i32* %10)[i64 7]; - store i32* %38 with i32 %37; - [1000 * i32]* %39 = getelementptr &([1000 * i32]* %7)[i64 8]; + [1000 * i32]* %34 = getelementptr &([1000 * i32]* %9)[i64 5]; + i32* %35 = getelementptr &([1000 * i32]* %34)[i64 0][i32 %4]; + store i32* %35 with i32 %33; + [1000 * i32]* %36 = getelementptr &([1000 * i32]* %6)[i64 6]; + i32* %37 = getelementptr &([1000 * i32]* %36)[i64 0][i32 %4]; + i32 %38 = load i32* %37; + [1000 * i32]* %39 = getelementptr &([1000 * i32]* %9)[i64 6]; i32* %40 = getelementptr &([1000 * i32]* %39)[i64 0][i32 %4]; - i32 %41 = load i32* %40; - i32* %42 = getelementptr &(i32* %10)[i64 8]; - store i32* %42 with i32 %41; - [1000 * i32]* %43 = getelementptr &([1000 * i32]* %7)[i64 9]; - i32* %44 = getelementptr &([1000 * i32]* %43)[i64 0][i32 %4]; - i32 %45 = load i32* %44; - i32* %46 = getelementptr &(i32* %10)[i64 9]; - store i32* %46 with i32 %45; - [1000 * i32]* %47 = getelementptr &([1000 * i32]* %7)[i64 10]; - i32* %48 = getelementptr &([1000 * i32]* %47)[i64 0][i32 %4]; - i32 %49 = load i32* %48; - i32* %50 = getelementptr &(i32* %10)[i64 10]; - store i32* %50 with i32 %49; - [1000 * i32]* %51 = getelementptr &([1000 * i32]* %7)[i64 11]; + store i32* %40 with i32 %38; + [1000 * i32]* %41 = getelementptr &([1000 * i32]* %6)[i64 7]; + i32* %42 = getelementptr &([1000 * i32]* %41)[i64 0][i32 %4]; + i32 %43 = load i32* %42; + [1000 * i32]* %44 = getelementptr &([1000 * i32]* %9)[i64 7]; + i32* %45 = getelementptr &([1000 * i32]* %44)[i64 0][i32 %4]; + store i32* %45 with i32 %43; + [1000 * i32]* %46 = getelementptr &([1000 * i32]* %6)[i64 8]; + i32* %47 = getelementptr &([1000 * i32]* %46)[i64 0][i32 %4]; + i32 %48 = load i32* %47; + [1000 * i32]* %49 = getelementptr &([1000 * i32]* %9)[i64 8]; + i32* %50 = getelementptr &([1000 * i32]* %49)[i64 0][i32 %4]; + store i32* %50 with i32 %48; + [1000 * i32]* %51 = getelementptr &([1000 * i32]* %6)[i64 9]; i32* %52 = getelementptr &([1000 * i32]* %51)[i64 0][i32 %4]; i32 %53 = load i32* %52; - i32* %54 = getelementptr &(i32* %10)[i64 11]; - store i32* %54 with i32 %53; - [1000 * i32]* %55 = getelementptr &([1000 * i32]* %7)[i64 12]; - i32* %56 = getelementptr &([1000 * i32]* %55)[i64 0][i32 %4]; - i32 %57 = load i32* %56; - i32* %58 = getelementptr &(i32* %10)[i64 12]; - store i32* %58 with i32 %57; - [1000 * i32]* %59 = getelementptr &([1000 * i32]* %7)[i64 13]; + [1000 * i32]* %54 = getelementptr &([1000 * i32]* %9)[i64 9]; + i32* %55 = getelementptr &([1000 * i32]* %54)[i64 0][i32 %4]; + store i32* %55 with i32 %53; + [1000 * i32]* %56 = getelementptr &([1000 * i32]* %6)[i64 10]; + i32* %57 = getelementptr &([1000 * i32]* %56)[i64 0][i32 %4]; + i32 %58 = load i32* %57; + [1000 * i32]* %59 = getelementptr &([1000 * i32]* %9)[i64 10]; i32* %60 = getelementptr &([1000 * i32]* %59)[i64 0][i32 %4]; - i32 %61 = load i32* %60; - i32* %62 = getelementptr &(i32* %10)[i64 13]; - store i32* %62 with i32 %61; - [1000 * i32]* %63 = getelementptr &([1000 * i32]* %7)[i64 14]; - i32* %64 = getelementptr &([1000 * i32]* %63)[i64 0][i32 %4]; - i32 %65 = load i32* %64; - i32* %66 = getelementptr &(i32* %10)[i64 14]; - store i32* %66 with i32 %65; - [1000 * i32]* %67 = getelementptr &([1000 * i32]* %7)[i64 15]; - i32* %68 = getelementptr &([1000 * i32]* %67)[i64 0][i32 %4]; - i32 %69 = load i32* %68; - i32* %70 = getelementptr &(i32* %10)[i64 15]; - store i32* %70 with i32 %69; - [1000 * i32]* %71 = getelementptr &([1000 * i32]* %7)[i64 16]; + store i32* %60 with i32 %58; + [1000 * i32]* %61 = getelementptr &([1000 * i32]* %6)[i64 11]; + i32* %62 = getelementptr &([1000 * i32]* %61)[i64 0][i32 %4]; + i32 %63 = load i32* %62; + [1000 * i32]* %64 = getelementptr &([1000 * i32]* %9)[i64 11]; + i32* %65 = getelementptr &([1000 * i32]* %64)[i64 0][i32 %4]; + store i32* %65 with i32 %63; + [1000 * i32]* %66 = getelementptr &([1000 * i32]* %6)[i64 12]; + i32* %67 = getelementptr &([1000 * i32]* %66)[i64 0][i32 %4]; + i32 %68 = load i32* %67; + [1000 * i32]* %69 = getelementptr &([1000 * i32]* %9)[i64 12]; + i32* %70 = getelementptr &([1000 * i32]* %69)[i64 0][i32 %4]; + store i32* %70 with i32 %68; + [1000 * i32]* %71 = getelementptr &([1000 * i32]* %6)[i64 13]; i32* %72 = getelementptr &([1000 * i32]* %71)[i64 0][i32 %4]; i32 %73 = load i32* %72; - i32* %74 = getelementptr &(i32* %10)[i64 16]; - store i32* %74 with i32 %73; - [1000 * i32]* %75 = getelementptr &([1000 * i32]* %7)[i64 17]; - i32* %76 = getelementptr &([1000 * i32]* %75)[i64 0][i32 %4]; - i32 %77 = load i32* %76; - i32* %78 = getelementptr &(i32* %10)[i64 17]; - store i32* %78 with i32 %77; - [1000 * i32]* %79 = getelementptr &([1000 * i32]* %7)[i64 18]; + [1000 * i32]* %74 = getelementptr &([1000 * i32]* %9)[i64 13]; + i32* %75 = getelementptr &([1000 * i32]* %74)[i64 0][i32 %4]; + store i32* %75 with i32 %73; + [1000 * i32]* %76 = getelementptr &([1000 * i32]* %6)[i64 14]; + i32* %77 = getelementptr &([1000 * i32]* %76)[i64 0][i32 %4]; + i32 %78 = load i32* %77; + [1000 * i32]* %79 = getelementptr &([1000 * i32]* %9)[i64 14]; i32* %80 = getelementptr &([1000 * i32]* %79)[i64 0][i32 %4]; - i32 %81 = load i32* %80; - i32* %82 = getelementptr &(i32* %10)[i64 18]; - store i32* %82 with i32 %81; - [1000 * i32]* %83 = getelementptr &([1000 * i32]* %7)[i64 19]; - i32* %84 = getelementptr &([1000 * i32]* %83)[i64 0][i32 %4]; - i32 %85 = load i32* %84; - i32* %86 = getelementptr &(i32* %10)[i64 19]; - store i32* %86 with i32 %85; - [1000 * i32]* %87 = getelementptr &([1000 * i32]* %7)[i64 20]; - i32* %88 = getelementptr &([1000 * i32]* %87)[i64 0][i32 %4]; - i32 %89 = load i32* %88; - i32* %90 = getelementptr &(i32* %10)[i64 20]; - store i32* %90 with i32 %89; - [1000 * i32]* %91 = getelementptr &([1000 * i32]* %7)[i64 21]; + store i32* %80 with i32 %78; + [1000 * i32]* %81 = getelementptr &([1000 * i32]* %6)[i64 15]; + i32* %82 = getelementptr &([1000 * i32]* %81)[i64 0][i32 %4]; + i32 %83 = load i32* %82; + [1000 * i32]* %84 = getelementptr &([1000 * i32]* %9)[i64 15]; + i32* %85 = getelementptr &([1000 * i32]* %84)[i64 0][i32 %4]; + store i32* %85 with i32 %83; + [1000 * i32]* %86 = getelementptr &([1000 * i32]* %6)[i64 16]; + i32* %87 = getelementptr &([1000 * i32]* %86)[i64 0][i32 %4]; + i32 %88 = load i32* %87; + [1000 * i32]* %89 = getelementptr &([1000 * i32]* %9)[i64 16]; + i32* %90 = getelementptr &([1000 * i32]* %89)[i64 0][i32 %4]; + store i32* %90 with i32 %88; + [1000 * i32]* %91 = getelementptr &([1000 * i32]* %6)[i64 17]; i32* %92 = getelementptr &([1000 * i32]* %91)[i64 0][i32 %4]; i32 %93 = load i32* %92; - i32* %94 = getelementptr &(i32* %10)[i64 21]; - store i32* %94 with i32 %93; - [1000 * i32]* %95 = getelementptr &([1000 * i32]* %7)[i64 22]; - i32* %96 = getelementptr &([1000 * i32]* %95)[i64 0][i32 %4]; - i32 %97 = load i32* %96; - i32* %98 = getelementptr &(i32* %10)[i64 22]; - store i32* %98 with i32 %97; - [1000 * i32]* %99 = getelementptr &([1000 * i32]* %7)[i64 23]; + [1000 * i32]* %94 = getelementptr &([1000 * i32]* %9)[i64 17]; + i32* %95 = getelementptr &([1000 * i32]* %94)[i64 0][i32 %4]; + store i32* %95 with i32 %93; + [1000 * i32]* %96 = getelementptr &([1000 * i32]* %6)[i64 18]; + i32* %97 = getelementptr &([1000 * i32]* %96)[i64 0][i32 %4]; + i32 %98 = load i32* %97; + [1000 * i32]* %99 = getelementptr &([1000 * i32]* %9)[i64 18]; i32* %100 = getelementptr &([1000 * i32]* %99)[i64 0][i32 %4]; - i32 %101 = load i32* %100; - i32* %102 = getelementptr &(i32* %10)[i64 23]; - store i32* %102 with i32 %101; - [1000 * i32]* %103 = getelementptr &([1000 * i32]* %7)[i64 24]; - i32* %104 = getelementptr &([1000 * i32]* %103)[i64 0][i32 %4]; - i32 %105 = load i32* %104; - i32* %106 = getelementptr &(i32* %10)[i64 24]; - store i32* %106 with i32 %105; - [1000 * i32]* %107 = getelementptr &([1000 * i32]* %7)[i64 25]; - i32* %108 = getelementptr &([1000 * i32]* %107)[i64 0][i32 %4]; - i32 %109 = load i32* %108; - i32* %110 = getelementptr &(i32* %10)[i64 25]; - store i32* %110 with i32 %109; - [1000 * i32]* %111 = getelementptr &([1000 * i32]* %7)[i64 26]; + store i32* %100 with i32 %98; + [1000 * i32]* %101 = getelementptr &([1000 * i32]* %6)[i64 19]; + i32* %102 = getelementptr &([1000 * i32]* %101)[i64 0][i32 %4]; + i32 %103 = load i32* %102; + [1000 * i32]* %104 = getelementptr &([1000 * i32]* %9)[i64 19]; + i32* %105 = getelementptr &([1000 * i32]* %104)[i64 0][i32 %4]; + store i32* %105 with i32 %103; + [1000 * i32]* %106 = getelementptr &([1000 * i32]* %6)[i64 20]; + i32* %107 = getelementptr &([1000 * i32]* %106)[i64 0][i32 %4]; + i32 %108 = load i32* %107; + [1000 * i32]* %109 = getelementptr &([1000 * i32]* %9)[i64 20]; + i32* %110 = getelementptr &([1000 * i32]* %109)[i64 0][i32 %4]; + store i32* %110 with i32 %108; + [1000 * i32]* %111 = getelementptr &([1000 * i32]* %6)[i64 21]; i32* %112 = getelementptr &([1000 * i32]* %111)[i64 0][i32 %4]; i32 %113 = load i32* %112; - i32* %114 = getelementptr &(i32* %10)[i64 26]; - store i32* %114 with i32 %113; - [1000 * i32]* %115 = getelementptr &([1000 * i32]* %7)[i64 27]; - i32* %116 = getelementptr &([1000 * i32]* %115)[i64 0][i32 %4]; - i32 %117 = load i32* %116; - i32* %118 = getelementptr &(i32* %10)[i64 27]; - store i32* %118 with i32 %117; - [1000 * i32]* %119 = getelementptr &([1000 * i32]* %7)[i64 28]; + [1000 * i32]* %114 = getelementptr &([1000 * i32]* %9)[i64 21]; + i32* %115 = getelementptr &([1000 * i32]* %114)[i64 0][i32 %4]; + store i32* %115 with i32 %113; + [1000 * i32]* %116 = getelementptr &([1000 * i32]* %6)[i64 22]; + i32* %117 = getelementptr &([1000 * i32]* %116)[i64 0][i32 %4]; + i32 %118 = load i32* %117; + [1000 * i32]* %119 = getelementptr &([1000 * i32]* %9)[i64 22]; i32* %120 = getelementptr &([1000 * i32]* %119)[i64 0][i32 %4]; - i32 %121 = load i32* %120; - i32* %122 = getelementptr &(i32* %10)[i64 28]; - store i32* %122 with i32 %121; - [1000 * i32]* %123 = getelementptr &([1000 * i32]* %7)[i64 29]; - i32* %124 = getelementptr &([1000 * i32]* %123)[i64 0][i32 %4]; - i32 %125 = load i32* %124; - i32* %126 = getelementptr &(i32* %10)[i64 29]; - store i32* %126 with i32 %125; - [1000 * i32]* %127 = getelementptr &([1000 * i32]* %7)[i64 30]; - i32* %128 = getelementptr &([1000 * i32]* %127)[i64 0][i32 %4]; - i32 %129 = load i32* %128; - i32* %130 = getelementptr &(i32* %10)[i64 30]; - store i32* %130 with i32 %129; - [1000 * i32]* %131 = getelementptr &([1000 * i32]* %7)[i64 31]; + store i32* %120 with i32 %118; + [1000 * i32]* %121 = getelementptr &([1000 * i32]* %6)[i64 23]; + i32* %122 = getelementptr &([1000 * i32]* %121)[i64 0][i32 %4]; + i32 %123 = load i32* %122; + [1000 * i32]* %124 = getelementptr &([1000 * i32]* %9)[i64 23]; + i32* %125 = getelementptr &([1000 * i32]* %124)[i64 0][i32 %4]; + store i32* %125 with i32 %123; + [1000 * i32]* %126 = getelementptr &([1000 * i32]* %6)[i64 24]; + i32* %127 = getelementptr &([1000 * i32]* %126)[i64 0][i32 %4]; + i32 %128 = load i32* %127; + [1000 * i32]* %129 = getelementptr &([1000 * i32]* %9)[i64 24]; + i32* %130 = getelementptr &([1000 * i32]* %129)[i64 0][i32 %4]; + store i32* %130 with i32 %128; + [1000 * i32]* %131 = getelementptr &([1000 * i32]* %6)[i64 25]; i32* %132 = getelementptr &([1000 * i32]* %131)[i64 0][i32 %4]; i32 %133 = load i32* %132; - i32* %134 = getelementptr &(i32* %10)[i64 31]; - store i32* %134 with i32 %133; - [1000 * i32]* %135 = getelementptr &([1000 * i32]* %7)[i64 32]; - i32* %136 = getelementptr &([1000 * i32]* %135)[i64 0][i32 %4]; - i32 %137 = load i32* %136; - i32* %138 = getelementptr &(i32* %10)[i64 32]; - store i32* %138 with i32 %137; - [1000 * i32]* %139 = getelementptr &([1000 * i32]* %7)[i64 33]; + [1000 * i32]* %134 = getelementptr &([1000 * i32]* %9)[i64 25]; + i32* %135 = getelementptr &([1000 * i32]* %134)[i64 0][i32 %4]; + store i32* %135 with i32 %133; + [1000 * i32]* %136 = getelementptr &([1000 * i32]* %6)[i64 26]; + i32* %137 = getelementptr &([1000 * i32]* %136)[i64 0][i32 %4]; + i32 %138 = load i32* %137; + [1000 * i32]* %139 = getelementptr &([1000 * i32]* %9)[i64 26]; i32* %140 = getelementptr &([1000 * i32]* %139)[i64 0][i32 %4]; - i32 %141 = load i32* %140; - i32* %142 = getelementptr &(i32* %10)[i64 33]; - store i32* %142 with i32 %141; - [1000 * i32]* %143 = getelementptr &([1000 * i32]* %7)[i64 34]; - i32* %144 = getelementptr &([1000 * i32]* %143)[i64 0][i32 %4]; - i32 %145 = load i32* %144; - i32* %146 = getelementptr &(i32* %10)[i64 34]; - store i32* %146 with i32 %145; - [1000 * i32]* %147 = getelementptr &([1000 * i32]* %7)[i64 35]; - i32* %148 = getelementptr &([1000 * i32]* %147)[i64 0][i32 %4]; - i32 %149 = load i32* %148; - i32* %150 = getelementptr &(i32* %10)[i64 35]; - store i32* %150 with i32 %149; - [1000 * i32]* %151 = getelementptr &([1000 * i32]* %7)[i64 36]; + store i32* %140 with i32 %138; + [1000 * i32]* %141 = getelementptr &([1000 * i32]* %6)[i64 27]; + i32* %142 = getelementptr &([1000 * i32]* %141)[i64 0][i32 %4]; + i32 %143 = load i32* %142; + [1000 * i32]* %144 = getelementptr &([1000 * i32]* %9)[i64 27]; + i32* %145 = getelementptr &([1000 * i32]* %144)[i64 0][i32 %4]; + store i32* %145 with i32 %143; + [1000 * i32]* %146 = getelementptr &([1000 * i32]* %6)[i64 28]; + i32* %147 = getelementptr &([1000 * i32]* %146)[i64 0][i32 %4]; + i32 %148 = load i32* %147; + [1000 * i32]* %149 = getelementptr &([1000 * i32]* %9)[i64 28]; + i32* %150 = getelementptr &([1000 * i32]* %149)[i64 0][i32 %4]; + store i32* %150 with i32 %148; + [1000 * i32]* %151 = getelementptr &([1000 * i32]* %6)[i64 29]; i32* %152 = getelementptr &([1000 * i32]* %151)[i64 0][i32 %4]; i32 %153 = load i32* %152; - i32* %154 = getelementptr &(i32* %10)[i64 36]; - store i32* %154 with i32 %153; - [1000 * i32]* %155 = getelementptr &([1000 * i32]* %7)[i64 37]; - i32* %156 = getelementptr &([1000 * i32]* %155)[i64 0][i32 %4]; - i32 %157 = load i32* %156; - i32* %158 = getelementptr &(i32* %10)[i64 37]; - store i32* %158 with i32 %157; - [1000 * i32]* %159 = getelementptr &([1000 * i32]* %7)[i64 38]; + [1000 * i32]* %154 = getelementptr &([1000 * i32]* %9)[i64 29]; + i32* %155 = getelementptr &([1000 * i32]* %154)[i64 0][i32 %4]; + store i32* %155 with i32 %153; + [1000 * i32]* %156 = getelementptr &([1000 * i32]* %6)[i64 30]; + i32* %157 = getelementptr &([1000 * i32]* %156)[i64 0][i32 %4]; + i32 %158 = load i32* %157; + [1000 * i32]* %159 = getelementptr &([1000 * i32]* %9)[i64 30]; i32* %160 = getelementptr &([1000 * i32]* %159)[i64 0][i32 %4]; - i32 %161 = load i32* %160; - i32* %162 = getelementptr &(i32* %10)[i64 38]; - store i32* %162 with i32 %161; - [1000 * i32]* %163 = getelementptr &([1000 * i32]* %7)[i64 39]; - i32* %164 = getelementptr &([1000 * i32]* %163)[i64 0][i32 %4]; - i32 %165 = load i32* %164; - i32* %166 = getelementptr &(i32* %10)[i64 39]; - store i32* %166 with i32 %165; - [1000 * i32]* %167 = getelementptr &([1000 * i32]* %7)[i64 40]; - i32* %168 = getelementptr &([1000 * i32]* %167)[i64 0][i32 %4]; - i32 %169 = load i32* %168; - i32* %170 = getelementptr &(i32* %10)[i64 40]; - store i32* %170 with i32 %169; - [1000 * i32]* %171 = getelementptr &([1000 * i32]* %7)[i64 41]; + store i32* %160 with i32 %158; + [1000 * i32]* %161 = getelementptr &([1000 * i32]* %6)[i64 31]; + i32* %162 = getelementptr &([1000 * i32]* %161)[i64 0][i32 %4]; + i32 %163 = load i32* %162; + [1000 * i32]* %164 = getelementptr &([1000 * i32]* %9)[i64 31]; + i32* %165 = getelementptr &([1000 * i32]* %164)[i64 0][i32 %4]; + store i32* %165 with i32 %163; + [1000 * i32]* %166 = getelementptr &([1000 * i32]* %6)[i64 32]; + i32* %167 = getelementptr &([1000 * i32]* %166)[i64 0][i32 %4]; + i32 %168 = load i32* %167; + [1000 * i32]* %169 = getelementptr &([1000 * i32]* %9)[i64 32]; + i32* %170 = getelementptr &([1000 * i32]* %169)[i64 0][i32 %4]; + store i32* %170 with i32 %168; + [1000 * i32]* %171 = getelementptr &([1000 * i32]* %6)[i64 33]; i32* %172 = getelementptr &([1000 * i32]* %171)[i64 0][i32 %4]; i32 %173 = load i32* %172; - i32* %174 = getelementptr &(i32* %10)[i64 41]; - store i32* %174 with i32 %173; - [1000 * i32]* %175 = getelementptr &([1000 * i32]* %7)[i64 42]; - i32* %176 = getelementptr &([1000 * i32]* %175)[i64 0][i32 %4]; - i32 %177 = load i32* %176; - i32* %178 = getelementptr &(i32* %10)[i64 42]; - store i32* %178 with i32 %177; - [1000 * i32]* %179 = getelementptr &([1000 * i32]* %7)[i64 43]; + [1000 * i32]* %174 = getelementptr &([1000 * i32]* %9)[i64 33]; + i32* %175 = getelementptr &([1000 * i32]* %174)[i64 0][i32 %4]; + store i32* %175 with i32 %173; + [1000 * i32]* %176 = getelementptr &([1000 * i32]* %6)[i64 34]; + i32* %177 = getelementptr &([1000 * i32]* %176)[i64 0][i32 %4]; + i32 %178 = load i32* %177; + [1000 * i32]* %179 = getelementptr &([1000 * i32]* %9)[i64 34]; i32* %180 = getelementptr &([1000 * i32]* %179)[i64 0][i32 %4]; - i32 %181 = load i32* %180; - i32* %182 = getelementptr &(i32* %10)[i64 43]; - store i32* %182 with i32 %181; - [1000 * i32]* %183 = getelementptr &([1000 * i32]* %7)[i64 44]; - i32* %184 = getelementptr &([1000 * i32]* %183)[i64 0][i32 %4]; - i32 %185 = load i32* %184; - i32* %186 = getelementptr &(i32* %10)[i64 44]; - store i32* %186 with i32 %185; - [1000 * i32]* %187 = getelementptr &([1000 * i32]* %7)[i64 45]; - i32* %188 = getelementptr &([1000 * i32]* %187)[i64 0][i32 %4]; - i32 %189 = load i32* %188; - i32* %190 = getelementptr &(i32* %10)[i64 45]; - store i32* %190 with i32 %189; - [1000 * i32]* %191 = getelementptr &([1000 * i32]* %7)[i64 46]; + store i32* %180 with i32 %178; + [1000 * i32]* %181 = getelementptr &([1000 * i32]* %6)[i64 35]; + i32* %182 = getelementptr &([1000 * i32]* %181)[i64 0][i32 %4]; + i32 %183 = load i32* %182; + [1000 * i32]* %184 = getelementptr &([1000 * i32]* %9)[i64 35]; + i32* %185 = getelementptr &([1000 * i32]* %184)[i64 0][i32 %4]; + store i32* %185 with i32 %183; + [1000 * i32]* %186 = getelementptr &([1000 * i32]* %6)[i64 36]; + i32* %187 = getelementptr &([1000 * i32]* %186)[i64 0][i32 %4]; + i32 %188 = load i32* %187; + [1000 * i32]* %189 = getelementptr &([1000 * i32]* %9)[i64 36]; + i32* %190 = getelementptr &([1000 * i32]* %189)[i64 0][i32 %4]; + store i32* %190 with i32 %188; + [1000 * i32]* %191 = getelementptr &([1000 * i32]* %6)[i64 37]; i32* %192 = getelementptr &([1000 * i32]* %191)[i64 0][i32 %4]; i32 %193 = load i32* %192; - i32* %194 = getelementptr &(i32* %10)[i64 46]; - store i32* %194 with i32 %193; - [1000 * i32]* %195 = getelementptr &([1000 * i32]* %7)[i64 47]; - i32* %196 = getelementptr &([1000 * i32]* %195)[i64 0][i32 %4]; - i32 %197 = load i32* %196; - i32* %198 = getelementptr &(i32* %10)[i64 47]; - store i32* %198 with i32 %197; - [1000 * i32]* %199 = getelementptr &([1000 * i32]* %7)[i64 48]; + [1000 * i32]* %194 = getelementptr &([1000 * i32]* %9)[i64 37]; + i32* %195 = getelementptr &([1000 * i32]* %194)[i64 0][i32 %4]; + store i32* %195 with i32 %193; + [1000 * i32]* %196 = getelementptr &([1000 * i32]* %6)[i64 38]; + i32* %197 = getelementptr &([1000 * i32]* %196)[i64 0][i32 %4]; + i32 %198 = load i32* %197; + [1000 * i32]* %199 = getelementptr &([1000 * i32]* %9)[i64 38]; i32* %200 = getelementptr &([1000 * i32]* %199)[i64 0][i32 %4]; - i32 %201 = load i32* %200; - i32* %202 = getelementptr &(i32* %10)[i64 48]; - store i32* %202 with i32 %201; - [1000 * i32]* %203 = getelementptr &([1000 * i32]* %7)[i64 49]; - i32* %204 = getelementptr &([1000 * i32]* %203)[i64 0][i32 %4]; - i32 %205 = load i32* %204; - i32* %206 = getelementptr &(i32* %10)[i64 49]; - store i32* %206 with i32 %205; - [1000 * i32]* %207 = getelementptr &([1000 * i32]* %7)[i64 50]; - i32* %208 = getelementptr &([1000 * i32]* %207)[i64 0][i32 %4]; - i32 %209 = load i32* %208; - i32* %210 = getelementptr &(i32* %10)[i64 50]; - store i32* %210 with i32 %209; - [1000 * i32]* %211 = getelementptr &([1000 * i32]* %7)[i64 51]; + store i32* %200 with i32 %198; + [1000 * i32]* %201 = getelementptr &([1000 * i32]* %6)[i64 39]; + i32* %202 = getelementptr &([1000 * i32]* %201)[i64 0][i32 %4]; + i32 %203 = load i32* %202; + [1000 * i32]* %204 = getelementptr &([1000 * i32]* %9)[i64 39]; + i32* %205 = getelementptr &([1000 * i32]* %204)[i64 0][i32 %4]; + store i32* %205 with i32 %203; + [1000 * i32]* %206 = getelementptr &([1000 * i32]* %6)[i64 40]; + i32* %207 = getelementptr &([1000 * i32]* %206)[i64 0][i32 %4]; + i32 %208 = load i32* %207; + [1000 * i32]* %209 = getelementptr &([1000 * i32]* %9)[i64 40]; + i32* %210 = getelementptr &([1000 * i32]* %209)[i64 0][i32 %4]; + store i32* %210 with i32 %208; + [1000 * i32]* %211 = getelementptr &([1000 * i32]* %6)[i64 41]; i32* %212 = getelementptr &([1000 * i32]* %211)[i64 0][i32 %4]; i32 %213 = load i32* %212; - i32* %214 = getelementptr &(i32* %10)[i64 51]; - store i32* %214 with i32 %213; - [1000 * i32]* %215 = getelementptr &([1000 * i32]* %7)[i64 52]; - i32* %216 = getelementptr &([1000 * i32]* %215)[i64 0][i32 %4]; - i32 %217 = load i32* %216; - i32* %218 = getelementptr &(i32* %10)[i64 52]; - store i32* %218 with i32 %217; - [1000 * i32]* %219 = getelementptr &([1000 * i32]* %7)[i64 53]; + [1000 * i32]* %214 = getelementptr &([1000 * i32]* %9)[i64 41]; + i32* %215 = getelementptr &([1000 * i32]* %214)[i64 0][i32 %4]; + store i32* %215 with i32 %213; + [1000 * i32]* %216 = getelementptr &([1000 * i32]* %6)[i64 42]; + i32* %217 = getelementptr &([1000 * i32]* %216)[i64 0][i32 %4]; + i32 %218 = load i32* %217; + [1000 * i32]* %219 = getelementptr &([1000 * i32]* %9)[i64 42]; i32* %220 = getelementptr &([1000 * i32]* %219)[i64 0][i32 %4]; - i32 %221 = load i32* %220; - i32* %222 = getelementptr &(i32* %10)[i64 53]; - store i32* %222 with i32 %221; - [1000 * i32]* %223 = getelementptr &([1000 * i32]* %7)[i64 54]; - i32* %224 = getelementptr &([1000 * i32]* %223)[i64 0][i32 %4]; - i32 %225 = load i32* %224; - i32* %226 = getelementptr &(i32* %10)[i64 54]; - store i32* %226 with i32 %225; - [1000 * i32]* %227 = getelementptr &([1000 * i32]* %7)[i64 55]; - i32* %228 = getelementptr &([1000 * i32]* %227)[i64 0][i32 %4]; - i32 %229 = load i32* %228; - i32* %230 = getelementptr &(i32* %10)[i64 55]; - store i32* %230 with i32 %229; - [1000 * i32]* %231 = getelementptr &([1000 * i32]* %7)[i64 56]; + store i32* %220 with i32 %218; + [1000 * i32]* %221 = getelementptr &([1000 * i32]* %6)[i64 43]; + i32* %222 = getelementptr &([1000 * i32]* %221)[i64 0][i32 %4]; + i32 %223 = load i32* %222; + [1000 * i32]* %224 = getelementptr &([1000 * i32]* %9)[i64 43]; + i32* %225 = getelementptr &([1000 * i32]* %224)[i64 0][i32 %4]; + store i32* %225 with i32 %223; + [1000 * i32]* %226 = getelementptr &([1000 * i32]* %6)[i64 44]; + i32* %227 = getelementptr &([1000 * i32]* %226)[i64 0][i32 %4]; + i32 %228 = load i32* %227; + [1000 * i32]* %229 = getelementptr &([1000 * i32]* %9)[i64 44]; + i32* %230 = getelementptr &([1000 * i32]* %229)[i64 0][i32 %4]; + store i32* %230 with i32 %228; + [1000 * i32]* %231 = getelementptr &([1000 * i32]* %6)[i64 45]; i32* %232 = getelementptr &([1000 * i32]* %231)[i64 0][i32 %4]; i32 %233 = load i32* %232; - i32* %234 = getelementptr &(i32* %10)[i64 56]; - store i32* %234 with i32 %233; - [1000 * i32]* %235 = getelementptr &([1000 * i32]* %7)[i64 57]; - i32* %236 = getelementptr &([1000 * i32]* %235)[i64 0][i32 %4]; - i32 %237 = load i32* %236; - i32* %238 = getelementptr &(i32* %10)[i64 57]; - store i32* %238 with i32 %237; - [1000 * i32]* %239 = getelementptr &([1000 * i32]* %7)[i64 58]; + [1000 * i32]* %234 = getelementptr &([1000 * i32]* %9)[i64 45]; + i32* %235 = getelementptr &([1000 * i32]* %234)[i64 0][i32 %4]; + store i32* %235 with i32 %233; + [1000 * i32]* %236 = getelementptr &([1000 * i32]* %6)[i64 46]; + i32* %237 = getelementptr &([1000 * i32]* %236)[i64 0][i32 %4]; + i32 %238 = load i32* %237; + [1000 * i32]* %239 = getelementptr &([1000 * i32]* %9)[i64 46]; i32* %240 = getelementptr &([1000 * i32]* %239)[i64 0][i32 %4]; - i32 %241 = load i32* %240; - i32* %242 = getelementptr &(i32* %10)[i64 58]; - store i32* %242 with i32 %241; - [1000 * i32]* %243 = getelementptr &([1000 * i32]* %7)[i64 59]; - i32* %244 = getelementptr &([1000 * i32]* %243)[i64 0][i32 %4]; - i32 %245 = load i32* %244; - i32* %246 = getelementptr &(i32* %10)[i64 59]; - store i32* %246 with i32 %245; - [1000 * i32]* %247 = getelementptr &([1000 * i32]* %7)[i64 60]; - i32* %248 = getelementptr &([1000 * i32]* %247)[i64 0][i32 %4]; - i32 %249 = load i32* %248; - i32* %250 = getelementptr &(i32* %10)[i64 60]; - store i32* %250 with i32 %249; - [1000 * i32]* %251 = getelementptr &([1000 * i32]* %7)[i64 61]; + store i32* %240 with i32 %238; + [1000 * i32]* %241 = getelementptr &([1000 * i32]* %6)[i64 47]; + i32* %242 = getelementptr &([1000 * i32]* %241)[i64 0][i32 %4]; + i32 %243 = load i32* %242; + [1000 * i32]* %244 = getelementptr &([1000 * i32]* %9)[i64 47]; + i32* %245 = getelementptr &([1000 * i32]* %244)[i64 0][i32 %4]; + store i32* %245 with i32 %243; + [1000 * i32]* %246 = getelementptr &([1000 * i32]* %6)[i64 48]; + i32* %247 = getelementptr &([1000 * i32]* %246)[i64 0][i32 %4]; + i32 %248 = load i32* %247; + [1000 * i32]* %249 = getelementptr &([1000 * i32]* %9)[i64 48]; + i32* %250 = getelementptr &([1000 * i32]* %249)[i64 0][i32 %4]; + store i32* %250 with i32 %248; + [1000 * i32]* %251 = getelementptr &([1000 * i32]* %6)[i64 49]; i32* %252 = getelementptr &([1000 * i32]* %251)[i64 0][i32 %4]; i32 %253 = load i32* %252; - i32* %254 = getelementptr &(i32* %10)[i64 61]; - store i32* %254 with i32 %253; - [1000 * i32]* %255 = getelementptr &([1000 * i32]* %7)[i64 62]; - i32* %256 = getelementptr &([1000 * i32]* %255)[i64 0][i32 %4]; - i32 %257 = load i32* %256; - i32* %258 = getelementptr &(i32* %10)[i64 62]; - store i32* %258 with i32 %257; - [1000 * i32]* %259 = getelementptr &([1000 * i32]* %7)[i64 63]; + [1000 * i32]* %254 = getelementptr &([1000 * i32]* %9)[i64 49]; + i32* %255 = getelementptr &([1000 * i32]* %254)[i64 0][i32 %4]; + store i32* %255 with i32 %253; + [1000 * i32]* %256 = getelementptr &([1000 * i32]* %6)[i64 50]; + i32* %257 = getelementptr &([1000 * i32]* %256)[i64 0][i32 %4]; + i32 %258 = load i32* %257; + [1000 * i32]* %259 = getelementptr &([1000 * i32]* %9)[i64 50]; i32* %260 = getelementptr &([1000 * i32]* %259)[i64 0][i32 %4]; - i32 %261 = load i32* %260; - i32* %262 = getelementptr &(i32* %10)[i64 63]; - store i32* %262 with i32 %261; - i32 %263 = add i32 %6, i32 64; - i1 %264 = icmp slt i32 %263, i32 960; - cbr i1 %264(prob = 0.933333), ^while.body, ^while.body1; - ^while.body1: - [1000 * i32]* %265 = getelementptr &([1000 * i32]* %7)[i64 64]; - i32* %266 = getelementptr &([1000 * i32]* %265)[i64 0][i32 %4]; - i32 %267 = load i32* %266; - i32* %268 = getelementptr &(i32* %10)[i64 64]; - store i32* %268 with i32 %267; - [1000 * i32]* %269 = getelementptr &([1000 * i32]* %7)[i64 65]; + store i32* %260 with i32 %258; + [1000 * i32]* %261 = getelementptr &([1000 * i32]* %6)[i64 51]; + i32* %262 = getelementptr &([1000 * i32]* %261)[i64 0][i32 %4]; + i32 %263 = load i32* %262; + [1000 * i32]* %264 = getelementptr &([1000 * i32]* %9)[i64 51]; + i32* %265 = getelementptr &([1000 * i32]* %264)[i64 0][i32 %4]; + store i32* %265 with i32 %263; + [1000 * i32]* %266 = getelementptr &([1000 * i32]* %6)[i64 52]; + i32* %267 = getelementptr &([1000 * i32]* %266)[i64 0][i32 %4]; + i32 %268 = load i32* %267; + [1000 * i32]* %269 = getelementptr &([1000 * i32]* %9)[i64 52]; i32* %270 = getelementptr &([1000 * i32]* %269)[i64 0][i32 %4]; - i32 %271 = load i32* %270; - i32* %272 = getelementptr &(i32* %10)[i64 65]; - store i32* %272 with i32 %271; - [1000 * i32]* %273 = getelementptr &([1000 * i32]* %7)[i64 66]; - i32* %274 = getelementptr &([1000 * i32]* %273)[i64 0][i32 %4]; - i32 %275 = load i32* %274; - i32* %276 = getelementptr &(i32* %10)[i64 66]; - store i32* %276 with i32 %275; - [1000 * i32]* %277 = getelementptr &([1000 * i32]* %7)[i64 67]; - i32* %278 = getelementptr &([1000 * i32]* %277)[i64 0][i32 %4]; - i32 %279 = load i32* %278; - i32* %280 = getelementptr &(i32* %10)[i64 67]; - store i32* %280 with i32 %279; - [1000 * i32]* %281 = getelementptr &([1000 * i32]* %7)[i64 68]; + store i32* %270 with i32 %268; + [1000 * i32]* %271 = getelementptr &([1000 * i32]* %6)[i64 53]; + i32* %272 = getelementptr &([1000 * i32]* %271)[i64 0][i32 %4]; + i32 %273 = load i32* %272; + [1000 * i32]* %274 = getelementptr &([1000 * i32]* %9)[i64 53]; + i32* %275 = getelementptr &([1000 * i32]* %274)[i64 0][i32 %4]; + store i32* %275 with i32 %273; + [1000 * i32]* %276 = getelementptr &([1000 * i32]* %6)[i64 54]; + i32* %277 = getelementptr &([1000 * i32]* %276)[i64 0][i32 %4]; + i32 %278 = load i32* %277; + [1000 * i32]* %279 = getelementptr &([1000 * i32]* %9)[i64 54]; + i32* %280 = getelementptr &([1000 * i32]* %279)[i64 0][i32 %4]; + store i32* %280 with i32 %278; + [1000 * i32]* %281 = getelementptr &([1000 * i32]* %6)[i64 55]; i32* %282 = getelementptr &([1000 * i32]* %281)[i64 0][i32 %4]; i32 %283 = load i32* %282; - i32* %284 = getelementptr &(i32* %10)[i64 68]; - store i32* %284 with i32 %283; - [1000 * i32]* %285 = getelementptr &([1000 * i32]* %7)[i64 69]; - i32* %286 = getelementptr &([1000 * i32]* %285)[i64 0][i32 %4]; - i32 %287 = load i32* %286; - i32* %288 = getelementptr &(i32* %10)[i64 69]; - store i32* %288 with i32 %287; - [1000 * i32]* %289 = getelementptr &([1000 * i32]* %7)[i64 70]; + [1000 * i32]* %284 = getelementptr &([1000 * i32]* %9)[i64 55]; + i32* %285 = getelementptr &([1000 * i32]* %284)[i64 0][i32 %4]; + store i32* %285 with i32 %283; + [1000 * i32]* %286 = getelementptr &([1000 * i32]* %6)[i64 56]; + i32* %287 = getelementptr &([1000 * i32]* %286)[i64 0][i32 %4]; + i32 %288 = load i32* %287; + [1000 * i32]* %289 = getelementptr &([1000 * i32]* %9)[i64 56]; i32* %290 = getelementptr &([1000 * i32]* %289)[i64 0][i32 %4]; - i32 %291 = load i32* %290; - i32* %292 = getelementptr &(i32* %10)[i64 70]; - store i32* %292 with i32 %291; - [1000 * i32]* %293 = getelementptr &([1000 * i32]* %7)[i64 71]; - i32* %294 = getelementptr &([1000 * i32]* %293)[i64 0][i32 %4]; - i32 %295 = load i32* %294; - i32* %296 = getelementptr &(i32* %10)[i64 71]; - store i32* %296 with i32 %295; - [1000 * i32]* %297 = getelementptr &([1000 * i32]* %7)[i64 72]; - i32* %298 = getelementptr &([1000 * i32]* %297)[i64 0][i32 %4]; - i32 %299 = load i32* %298; - i32* %300 = getelementptr &(i32* %10)[i64 72]; - store i32* %300 with i32 %299; - [1000 * i32]* %301 = getelementptr &([1000 * i32]* %7)[i64 73]; + store i32* %290 with i32 %288; + [1000 * i32]* %291 = getelementptr &([1000 * i32]* %6)[i64 57]; + i32* %292 = getelementptr &([1000 * i32]* %291)[i64 0][i32 %4]; + i32 %293 = load i32* %292; + [1000 * i32]* %294 = getelementptr &([1000 * i32]* %9)[i64 57]; + i32* %295 = getelementptr &([1000 * i32]* %294)[i64 0][i32 %4]; + store i32* %295 with i32 %293; + [1000 * i32]* %296 = getelementptr &([1000 * i32]* %6)[i64 58]; + i32* %297 = getelementptr &([1000 * i32]* %296)[i64 0][i32 %4]; + i32 %298 = load i32* %297; + [1000 * i32]* %299 = getelementptr &([1000 * i32]* %9)[i64 58]; + i32* %300 = getelementptr &([1000 * i32]* %299)[i64 0][i32 %4]; + store i32* %300 with i32 %298; + [1000 * i32]* %301 = getelementptr &([1000 * i32]* %6)[i64 59]; i32* %302 = getelementptr &([1000 * i32]* %301)[i64 0][i32 %4]; i32 %303 = load i32* %302; - i32* %304 = getelementptr &(i32* %10)[i64 73]; - store i32* %304 with i32 %303; - [1000 * i32]* %305 = getelementptr &([1000 * i32]* %7)[i64 74]; - i32* %306 = getelementptr &([1000 * i32]* %305)[i64 0][i32 %4]; - i32 %307 = load i32* %306; - i32* %308 = getelementptr &(i32* %10)[i64 74]; - store i32* %308 with i32 %307; - [1000 * i32]* %309 = getelementptr &([1000 * i32]* %7)[i64 75]; + [1000 * i32]* %304 = getelementptr &([1000 * i32]* %9)[i64 59]; + i32* %305 = getelementptr &([1000 * i32]* %304)[i64 0][i32 %4]; + store i32* %305 with i32 %303; + [1000 * i32]* %306 = getelementptr &([1000 * i32]* %6)[i64 60]; + i32* %307 = getelementptr &([1000 * i32]* %306)[i64 0][i32 %4]; + i32 %308 = load i32* %307; + [1000 * i32]* %309 = getelementptr &([1000 * i32]* %9)[i64 60]; i32* %310 = getelementptr &([1000 * i32]* %309)[i64 0][i32 %4]; - i32 %311 = load i32* %310; - i32* %312 = getelementptr &(i32* %10)[i64 75]; - store i32* %312 with i32 %311; - [1000 * i32]* %313 = getelementptr &([1000 * i32]* %7)[i64 76]; - i32* %314 = getelementptr &([1000 * i32]* %313)[i64 0][i32 %4]; - i32 %315 = load i32* %314; - i32* %316 = getelementptr &(i32* %10)[i64 76]; - store i32* %316 with i32 %315; - [1000 * i32]* %317 = getelementptr &([1000 * i32]* %7)[i64 77]; - i32* %318 = getelementptr &([1000 * i32]* %317)[i64 0][i32 %4]; - i32 %319 = load i32* %318; - i32* %320 = getelementptr &(i32* %10)[i64 77]; - store i32* %320 with i32 %319; - [1000 * i32]* %321 = getelementptr &([1000 * i32]* %7)[i64 78]; + store i32* %310 with i32 %308; + [1000 * i32]* %311 = getelementptr &([1000 * i32]* %6)[i64 61]; + i32* %312 = getelementptr &([1000 * i32]* %311)[i64 0][i32 %4]; + i32 %313 = load i32* %312; + [1000 * i32]* %314 = getelementptr &([1000 * i32]* %9)[i64 61]; + i32* %315 = getelementptr &([1000 * i32]* %314)[i64 0][i32 %4]; + store i32* %315 with i32 %313; + [1000 * i32]* %316 = getelementptr &([1000 * i32]* %6)[i64 62]; + i32* %317 = getelementptr &([1000 * i32]* %316)[i64 0][i32 %4]; + i32 %318 = load i32* %317; + [1000 * i32]* %319 = getelementptr &([1000 * i32]* %9)[i64 62]; + i32* %320 = getelementptr &([1000 * i32]* %319)[i64 0][i32 %4]; + store i32* %320 with i32 %318; + [1000 * i32]* %321 = getelementptr &([1000 * i32]* %6)[i64 63]; i32* %322 = getelementptr &([1000 * i32]* %321)[i64 0][i32 %4]; i32 %323 = load i32* %322; - i32* %324 = getelementptr &(i32* %10)[i64 78]; - store i32* %324 with i32 %323; - [1000 * i32]* %325 = getelementptr &([1000 * i32]* %7)[i64 79]; - i32* %326 = getelementptr &([1000 * i32]* %325)[i64 0][i32 %4]; - i32 %327 = load i32* %326; - i32* %328 = getelementptr &(i32* %10)[i64 79]; - store i32* %328 with i32 %327; - [1000 * i32]* %329 = getelementptr &([1000 * i32]* %7)[i64 80]; - i32* %330 = getelementptr &([1000 * i32]* %329)[i64 0][i32 %4]; - i32 %331 = load i32* %330; - i32* %332 = getelementptr &(i32* %10)[i64 80]; - store i32* %332 with i32 %331; - [1000 * i32]* %333 = getelementptr &([1000 * i32]* %7)[i64 81]; + [1000 * i32]* %324 = getelementptr &([1000 * i32]* %9)[i64 63]; + i32* %325 = getelementptr &([1000 * i32]* %324)[i64 0][i32 %4]; + store i32* %325 with i32 %323; + i32 %326 = add i32 %5, i32 64; + i1 %327 = icmp slt i32 %326, i32 960; + cbr i1 %327(prob = 0.933333), ^while.body, ^while.body1; + ^while.body1: + [1000 * i32]* %328 = getelementptr &([1000 * i32]* %6)[i64 64]; + i32* %329 = getelementptr &([1000 * i32]* %328)[i64 0][i32 %4]; + i32 %330 = load i32* %329; + [1000 * i32]* %331 = getelementptr &([1000 * i32]* %9)[i64 64]; + i32* %332 = getelementptr &([1000 * i32]* %331)[i64 0][i32 %4]; + store i32* %332 with i32 %330; + [1000 * i32]* %333 = getelementptr &([1000 * i32]* %6)[i64 65]; i32* %334 = getelementptr &([1000 * i32]* %333)[i64 0][i32 %4]; i32 %335 = load i32* %334; - i32* %336 = getelementptr &(i32* %10)[i64 81]; - store i32* %336 with i32 %335; - [1000 * i32]* %337 = getelementptr &([1000 * i32]* %7)[i64 82]; - i32* %338 = getelementptr &([1000 * i32]* %337)[i64 0][i32 %4]; - i32 %339 = load i32* %338; - i32* %340 = getelementptr &(i32* %10)[i64 82]; - store i32* %340 with i32 %339; - [1000 * i32]* %341 = getelementptr &([1000 * i32]* %7)[i64 83]; + [1000 * i32]* %336 = getelementptr &([1000 * i32]* %9)[i64 65]; + i32* %337 = getelementptr &([1000 * i32]* %336)[i64 0][i32 %4]; + store i32* %337 with i32 %335; + [1000 * i32]* %338 = getelementptr &([1000 * i32]* %6)[i64 66]; + i32* %339 = getelementptr &([1000 * i32]* %338)[i64 0][i32 %4]; + i32 %340 = load i32* %339; + [1000 * i32]* %341 = getelementptr &([1000 * i32]* %9)[i64 66]; i32* %342 = getelementptr &([1000 * i32]* %341)[i64 0][i32 %4]; - i32 %343 = load i32* %342; - i32* %344 = getelementptr &(i32* %10)[i64 83]; - store i32* %344 with i32 %343; - [1000 * i32]* %345 = getelementptr &([1000 * i32]* %7)[i64 84]; - i32* %346 = getelementptr &([1000 * i32]* %345)[i64 0][i32 %4]; - i32 %347 = load i32* %346; - i32* %348 = getelementptr &(i32* %10)[i64 84]; - store i32* %348 with i32 %347; - [1000 * i32]* %349 = getelementptr &([1000 * i32]* %7)[i64 85]; - i32* %350 = getelementptr &([1000 * i32]* %349)[i64 0][i32 %4]; - i32 %351 = load i32* %350; - i32* %352 = getelementptr &(i32* %10)[i64 85]; - store i32* %352 with i32 %351; - [1000 * i32]* %353 = getelementptr &([1000 * i32]* %7)[i64 86]; + store i32* %342 with i32 %340; + [1000 * i32]* %343 = getelementptr &([1000 * i32]* %6)[i64 67]; + i32* %344 = getelementptr &([1000 * i32]* %343)[i64 0][i32 %4]; + i32 %345 = load i32* %344; + [1000 * i32]* %346 = getelementptr &([1000 * i32]* %9)[i64 67]; + i32* %347 = getelementptr &([1000 * i32]* %346)[i64 0][i32 %4]; + store i32* %347 with i32 %345; + [1000 * i32]* %348 = getelementptr &([1000 * i32]* %6)[i64 68]; + i32* %349 = getelementptr &([1000 * i32]* %348)[i64 0][i32 %4]; + i32 %350 = load i32* %349; + [1000 * i32]* %351 = getelementptr &([1000 * i32]* %9)[i64 68]; + i32* %352 = getelementptr &([1000 * i32]* %351)[i64 0][i32 %4]; + store i32* %352 with i32 %350; + [1000 * i32]* %353 = getelementptr &([1000 * i32]* %6)[i64 69]; i32* %354 = getelementptr &([1000 * i32]* %353)[i64 0][i32 %4]; i32 %355 = load i32* %354; - i32* %356 = getelementptr &(i32* %10)[i64 86]; - store i32* %356 with i32 %355; - [1000 * i32]* %357 = getelementptr &([1000 * i32]* %7)[i64 87]; - i32* %358 = getelementptr &([1000 * i32]* %357)[i64 0][i32 %4]; - i32 %359 = load i32* %358; - i32* %360 = getelementptr &(i32* %10)[i64 87]; - store i32* %360 with i32 %359; - [1000 * i32]* %361 = getelementptr &([1000 * i32]* %7)[i64 88]; + [1000 * i32]* %356 = getelementptr &([1000 * i32]* %9)[i64 69]; + i32* %357 = getelementptr &([1000 * i32]* %356)[i64 0][i32 %4]; + store i32* %357 with i32 %355; + [1000 * i32]* %358 = getelementptr &([1000 * i32]* %6)[i64 70]; + i32* %359 = getelementptr &([1000 * i32]* %358)[i64 0][i32 %4]; + i32 %360 = load i32* %359; + [1000 * i32]* %361 = getelementptr &([1000 * i32]* %9)[i64 70]; i32* %362 = getelementptr &([1000 * i32]* %361)[i64 0][i32 %4]; - i32 %363 = load i32* %362; - i32* %364 = getelementptr &(i32* %10)[i64 88]; - store i32* %364 with i32 %363; - [1000 * i32]* %365 = getelementptr &([1000 * i32]* %7)[i64 89]; - i32* %366 = getelementptr &([1000 * i32]* %365)[i64 0][i32 %4]; - i32 %367 = load i32* %366; - i32* %368 = getelementptr &(i32* %10)[i64 89]; - store i32* %368 with i32 %367; - [1000 * i32]* %369 = getelementptr &([1000 * i32]* %7)[i64 90]; - i32* %370 = getelementptr &([1000 * i32]* %369)[i64 0][i32 %4]; - i32 %371 = load i32* %370; - i32* %372 = getelementptr &(i32* %10)[i64 90]; - store i32* %372 with i32 %371; - [1000 * i32]* %373 = getelementptr &([1000 * i32]* %7)[i64 91]; + store i32* %362 with i32 %360; + [1000 * i32]* %363 = getelementptr &([1000 * i32]* %6)[i64 71]; + i32* %364 = getelementptr &([1000 * i32]* %363)[i64 0][i32 %4]; + i32 %365 = load i32* %364; + [1000 * i32]* %366 = getelementptr &([1000 * i32]* %9)[i64 71]; + i32* %367 = getelementptr &([1000 * i32]* %366)[i64 0][i32 %4]; + store i32* %367 with i32 %365; + [1000 * i32]* %368 = getelementptr &([1000 * i32]* %6)[i64 72]; + i32* %369 = getelementptr &([1000 * i32]* %368)[i64 0][i32 %4]; + i32 %370 = load i32* %369; + [1000 * i32]* %371 = getelementptr &([1000 * i32]* %9)[i64 72]; + i32* %372 = getelementptr &([1000 * i32]* %371)[i64 0][i32 %4]; + store i32* %372 with i32 %370; + [1000 * i32]* %373 = getelementptr &([1000 * i32]* %6)[i64 73]; i32* %374 = getelementptr &([1000 * i32]* %373)[i64 0][i32 %4]; i32 %375 = load i32* %374; - i32* %376 = getelementptr &(i32* %10)[i64 91]; - store i32* %376 with i32 %375; - [1000 * i32]* %377 = getelementptr &([1000 * i32]* %7)[i64 92]; - i32* %378 = getelementptr &([1000 * i32]* %377)[i64 0][i32 %4]; - i32 %379 = load i32* %378; - i32* %380 = getelementptr &(i32* %10)[i64 92]; - store i32* %380 with i32 %379; - [1000 * i32]* %381 = getelementptr &([1000 * i32]* %7)[i64 93]; + [1000 * i32]* %376 = getelementptr &([1000 * i32]* %9)[i64 73]; + i32* %377 = getelementptr &([1000 * i32]* %376)[i64 0][i32 %4]; + store i32* %377 with i32 %375; + [1000 * i32]* %378 = getelementptr &([1000 * i32]* %6)[i64 74]; + i32* %379 = getelementptr &([1000 * i32]* %378)[i64 0][i32 %4]; + i32 %380 = load i32* %379; + [1000 * i32]* %381 = getelementptr &([1000 * i32]* %9)[i64 74]; i32* %382 = getelementptr &([1000 * i32]* %381)[i64 0][i32 %4]; - i32 %383 = load i32* %382; - i32* %384 = getelementptr &(i32* %10)[i64 93]; - store i32* %384 with i32 %383; - [1000 * i32]* %385 = getelementptr &([1000 * i32]* %7)[i64 94]; - i32* %386 = getelementptr &([1000 * i32]* %385)[i64 0][i32 %4]; - i32 %387 = load i32* %386; - i32* %388 = getelementptr &(i32* %10)[i64 94]; - store i32* %388 with i32 %387; - [1000 * i32]* %389 = getelementptr &([1000 * i32]* %7)[i64 95]; - i32* %390 = getelementptr &([1000 * i32]* %389)[i64 0][i32 %4]; - i32 %391 = load i32* %390; - i32* %392 = getelementptr &(i32* %10)[i64 95]; - store i32* %392 with i32 %391; - [1000 * i32]* %393 = getelementptr &([1000 * i32]* %7)[i64 96]; + store i32* %382 with i32 %380; + [1000 * i32]* %383 = getelementptr &([1000 * i32]* %6)[i64 75]; + i32* %384 = getelementptr &([1000 * i32]* %383)[i64 0][i32 %4]; + i32 %385 = load i32* %384; + [1000 * i32]* %386 = getelementptr &([1000 * i32]* %9)[i64 75]; + i32* %387 = getelementptr &([1000 * i32]* %386)[i64 0][i32 %4]; + store i32* %387 with i32 %385; + [1000 * i32]* %388 = getelementptr &([1000 * i32]* %6)[i64 76]; + i32* %389 = getelementptr &([1000 * i32]* %388)[i64 0][i32 %4]; + i32 %390 = load i32* %389; + [1000 * i32]* %391 = getelementptr &([1000 * i32]* %9)[i64 76]; + i32* %392 = getelementptr &([1000 * i32]* %391)[i64 0][i32 %4]; + store i32* %392 with i32 %390; + [1000 * i32]* %393 = getelementptr &([1000 * i32]* %6)[i64 77]; i32* %394 = getelementptr &([1000 * i32]* %393)[i64 0][i32 %4]; i32 %395 = load i32* %394; - i32* %396 = getelementptr &(i32* %10)[i64 96]; - store i32* %396 with i32 %395; - [1000 * i32]* %397 = getelementptr &([1000 * i32]* %7)[i64 97]; - i32* %398 = getelementptr &([1000 * i32]* %397)[i64 0][i32 %4]; - i32 %399 = load i32* %398; - i32* %400 = getelementptr &(i32* %10)[i64 97]; - store i32* %400 with i32 %399; - [1000 * i32]* %401 = getelementptr &([1000 * i32]* %7)[i64 98]; + [1000 * i32]* %396 = getelementptr &([1000 * i32]* %9)[i64 77]; + i32* %397 = getelementptr &([1000 * i32]* %396)[i64 0][i32 %4]; + store i32* %397 with i32 %395; + [1000 * i32]* %398 = getelementptr &([1000 * i32]* %6)[i64 78]; + i32* %399 = getelementptr &([1000 * i32]* %398)[i64 0][i32 %4]; + i32 %400 = load i32* %399; + [1000 * i32]* %401 = getelementptr &([1000 * i32]* %9)[i64 78]; i32* %402 = getelementptr &([1000 * i32]* %401)[i64 0][i32 %4]; - i32 %403 = load i32* %402; - i32* %404 = getelementptr &(i32* %10)[i64 98]; - store i32* %404 with i32 %403; - [1000 * i32]* %405 = getelementptr &([1000 * i32]* %7)[i64 99]; - i32* %406 = getelementptr &([1000 * i32]* %405)[i64 0][i32 %4]; - i32 %407 = load i32* %406; - i32* %408 = getelementptr &(i32* %10)[i64 99]; - store i32* %408 with i32 %407; - [1000 * i32]* %409 = getelementptr &([1000 * i32]* %7)[i64 100]; - i32* %410 = getelementptr &([1000 * i32]* %409)[i64 0][i32 %4]; - i32 %411 = load i32* %410; - i32* %412 = getelementptr &(i32* %10)[i64 100]; - store i32* %412 with i32 %411; - [1000 * i32]* %413 = getelementptr &([1000 * i32]* %7)[i64 101]; + store i32* %402 with i32 %400; + [1000 * i32]* %403 = getelementptr &([1000 * i32]* %6)[i64 79]; + i32* %404 = getelementptr &([1000 * i32]* %403)[i64 0][i32 %4]; + i32 %405 = load i32* %404; + [1000 * i32]* %406 = getelementptr &([1000 * i32]* %9)[i64 79]; + i32* %407 = getelementptr &([1000 * i32]* %406)[i64 0][i32 %4]; + store i32* %407 with i32 %405; + [1000 * i32]* %408 = getelementptr &([1000 * i32]* %6)[i64 80]; + i32* %409 = getelementptr &([1000 * i32]* %408)[i64 0][i32 %4]; + i32 %410 = load i32* %409; + [1000 * i32]* %411 = getelementptr &([1000 * i32]* %9)[i64 80]; + i32* %412 = getelementptr &([1000 * i32]* %411)[i64 0][i32 %4]; + store i32* %412 with i32 %410; + [1000 * i32]* %413 = getelementptr &([1000 * i32]* %6)[i64 81]; i32* %414 = getelementptr &([1000 * i32]* %413)[i64 0][i32 %4]; i32 %415 = load i32* %414; - i32* %416 = getelementptr &(i32* %10)[i64 101]; - store i32* %416 with i32 %415; - [1000 * i32]* %417 = getelementptr &([1000 * i32]* %7)[i64 102]; - i32* %418 = getelementptr &([1000 * i32]* %417)[i64 0][i32 %4]; - i32 %419 = load i32* %418; - i32* %420 = getelementptr &(i32* %10)[i64 102]; - store i32* %420 with i32 %419; - [1000 * i32]* %421 = getelementptr &([1000 * i32]* %7)[i64 103]; + [1000 * i32]* %416 = getelementptr &([1000 * i32]* %9)[i64 81]; + i32* %417 = getelementptr &([1000 * i32]* %416)[i64 0][i32 %4]; + store i32* %417 with i32 %415; + [1000 * i32]* %418 = getelementptr &([1000 * i32]* %6)[i64 82]; + i32* %419 = getelementptr &([1000 * i32]* %418)[i64 0][i32 %4]; + i32 %420 = load i32* %419; + [1000 * i32]* %421 = getelementptr &([1000 * i32]* %9)[i64 82]; i32* %422 = getelementptr &([1000 * i32]* %421)[i64 0][i32 %4]; - i32 %423 = load i32* %422; - i32* %424 = getelementptr &(i32* %10)[i64 103]; - store i32* %424 with i32 %423; - i32 %425 = add i32 %4, i32 1; - i1 %426 = icmp sgt i32 %1, i32 %425; - cbr i1 %426(prob = 0.984615), ^b1, ^b2; + store i32* %422 with i32 %420; + [1000 * i32]* %423 = getelementptr &([1000 * i32]* %6)[i64 83]; + i32* %424 = getelementptr &([1000 * i32]* %423)[i64 0][i32 %4]; + i32 %425 = load i32* %424; + [1000 * i32]* %426 = getelementptr &([1000 * i32]* %9)[i64 83]; + i32* %427 = getelementptr &([1000 * i32]* %426)[i64 0][i32 %4]; + store i32* %427 with i32 %425; + [1000 * i32]* %428 = getelementptr &([1000 * i32]* %6)[i64 84]; + i32* %429 = getelementptr &([1000 * i32]* %428)[i64 0][i32 %4]; + i32 %430 = load i32* %429; + [1000 * i32]* %431 = getelementptr &([1000 * i32]* %9)[i64 84]; + i32* %432 = getelementptr &([1000 * i32]* %431)[i64 0][i32 %4]; + store i32* %432 with i32 %430; + [1000 * i32]* %433 = getelementptr &([1000 * i32]* %6)[i64 85]; + i32* %434 = getelementptr &([1000 * i32]* %433)[i64 0][i32 %4]; + i32 %435 = load i32* %434; + [1000 * i32]* %436 = getelementptr &([1000 * i32]* %9)[i64 85]; + i32* %437 = getelementptr &([1000 * i32]* %436)[i64 0][i32 %4]; + store i32* %437 with i32 %435; + [1000 * i32]* %438 = getelementptr &([1000 * i32]* %6)[i64 86]; + i32* %439 = getelementptr &([1000 * i32]* %438)[i64 0][i32 %4]; + i32 %440 = load i32* %439; + [1000 * i32]* %441 = getelementptr &([1000 * i32]* %9)[i64 86]; + i32* %442 = getelementptr &([1000 * i32]* %441)[i64 0][i32 %4]; + store i32* %442 with i32 %440; + [1000 * i32]* %443 = getelementptr &([1000 * i32]* %6)[i64 87]; + i32* %444 = getelementptr &([1000 * i32]* %443)[i64 0][i32 %4]; + i32 %445 = load i32* %444; + [1000 * i32]* %446 = getelementptr &([1000 * i32]* %9)[i64 87]; + i32* %447 = getelementptr &([1000 * i32]* %446)[i64 0][i32 %4]; + store i32* %447 with i32 %445; + [1000 * i32]* %448 = getelementptr &([1000 * i32]* %6)[i64 88]; + i32* %449 = getelementptr &([1000 * i32]* %448)[i64 0][i32 %4]; + i32 %450 = load i32* %449; + [1000 * i32]* %451 = getelementptr &([1000 * i32]* %9)[i64 88]; + i32* %452 = getelementptr &([1000 * i32]* %451)[i64 0][i32 %4]; + store i32* %452 with i32 %450; + [1000 * i32]* %453 = getelementptr &([1000 * i32]* %6)[i64 89]; + i32* %454 = getelementptr &([1000 * i32]* %453)[i64 0][i32 %4]; + i32 %455 = load i32* %454; + [1000 * i32]* %456 = getelementptr &([1000 * i32]* %9)[i64 89]; + i32* %457 = getelementptr &([1000 * i32]* %456)[i64 0][i32 %4]; + store i32* %457 with i32 %455; + [1000 * i32]* %458 = getelementptr &([1000 * i32]* %6)[i64 90]; + i32* %459 = getelementptr &([1000 * i32]* %458)[i64 0][i32 %4]; + i32 %460 = load i32* %459; + [1000 * i32]* %461 = getelementptr &([1000 * i32]* %9)[i64 90]; + i32* %462 = getelementptr &([1000 * i32]* %461)[i64 0][i32 %4]; + store i32* %462 with i32 %460; + [1000 * i32]* %463 = getelementptr &([1000 * i32]* %6)[i64 91]; + i32* %464 = getelementptr &([1000 * i32]* %463)[i64 0][i32 %4]; + i32 %465 = load i32* %464; + [1000 * i32]* %466 = getelementptr &([1000 * i32]* %9)[i64 91]; + i32* %467 = getelementptr &([1000 * i32]* %466)[i64 0][i32 %4]; + store i32* %467 with i32 %465; + [1000 * i32]* %468 = getelementptr &([1000 * i32]* %6)[i64 92]; + i32* %469 = getelementptr &([1000 * i32]* %468)[i64 0][i32 %4]; + i32 %470 = load i32* %469; + [1000 * i32]* %471 = getelementptr &([1000 * i32]* %9)[i64 92]; + i32* %472 = getelementptr &([1000 * i32]* %471)[i64 0][i32 %4]; + store i32* %472 with i32 %470; + [1000 * i32]* %473 = getelementptr &([1000 * i32]* %6)[i64 93]; + i32* %474 = getelementptr &([1000 * i32]* %473)[i64 0][i32 %4]; + i32 %475 = load i32* %474; + [1000 * i32]* %476 = getelementptr &([1000 * i32]* %9)[i64 93]; + i32* %477 = getelementptr &([1000 * i32]* %476)[i64 0][i32 %4]; + store i32* %477 with i32 %475; + [1000 * i32]* %478 = getelementptr &([1000 * i32]* %6)[i64 94]; + i32* %479 = getelementptr &([1000 * i32]* %478)[i64 0][i32 %4]; + i32 %480 = load i32* %479; + [1000 * i32]* %481 = getelementptr &([1000 * i32]* %9)[i64 94]; + i32* %482 = getelementptr &([1000 * i32]* %481)[i64 0][i32 %4]; + store i32* %482 with i32 %480; + [1000 * i32]* %483 = getelementptr &([1000 * i32]* %6)[i64 95]; + i32* %484 = getelementptr &([1000 * i32]* %483)[i64 0][i32 %4]; + i32 %485 = load i32* %484; + [1000 * i32]* %486 = getelementptr &([1000 * i32]* %9)[i64 95]; + i32* %487 = getelementptr &([1000 * i32]* %486)[i64 0][i32 %4]; + store i32* %487 with i32 %485; + [1000 * i32]* %488 = getelementptr &([1000 * i32]* %6)[i64 96]; + i32* %489 = getelementptr &([1000 * i32]* %488)[i64 0][i32 %4]; + i32 %490 = load i32* %489; + [1000 * i32]* %491 = getelementptr &([1000 * i32]* %9)[i64 96]; + i32* %492 = getelementptr &([1000 * i32]* %491)[i64 0][i32 %4]; + store i32* %492 with i32 %490; + [1000 * i32]* %493 = getelementptr &([1000 * i32]* %6)[i64 97]; + i32* %494 = getelementptr &([1000 * i32]* %493)[i64 0][i32 %4]; + i32 %495 = load i32* %494; + [1000 * i32]* %496 = getelementptr &([1000 * i32]* %9)[i64 97]; + i32* %497 = getelementptr &([1000 * i32]* %496)[i64 0][i32 %4]; + store i32* %497 with i32 %495; + [1000 * i32]* %498 = getelementptr &([1000 * i32]* %6)[i64 98]; + i32* %499 = getelementptr &([1000 * i32]* %498)[i64 0][i32 %4]; + i32 %500 = load i32* %499; + [1000 * i32]* %501 = getelementptr &([1000 * i32]* %9)[i64 98]; + i32* %502 = getelementptr &([1000 * i32]* %501)[i64 0][i32 %4]; + store i32* %502 with i32 %500; + [1000 * i32]* %503 = getelementptr &([1000 * i32]* %6)[i64 99]; + i32* %504 = getelementptr &([1000 * i32]* %503)[i64 0][i32 %4]; + i32 %505 = load i32* %504; + [1000 * i32]* %506 = getelementptr &([1000 * i32]* %9)[i64 99]; + i32* %507 = getelementptr &([1000 * i32]* %506)[i64 0][i32 %4]; + store i32* %507 with i32 %505; + [1000 * i32]* %508 = getelementptr &([1000 * i32]* %6)[i64 100]; + i32* %509 = getelementptr &([1000 * i32]* %508)[i64 0][i32 %4]; + i32 %510 = load i32* %509; + [1000 * i32]* %511 = getelementptr &([1000 * i32]* %9)[i64 100]; + i32* %512 = getelementptr &([1000 * i32]* %511)[i64 0][i32 %4]; + store i32* %512 with i32 %510; + [1000 * i32]* %513 = getelementptr &([1000 * i32]* %6)[i64 101]; + i32* %514 = getelementptr &([1000 * i32]* %513)[i64 0][i32 %4]; + i32 %515 = load i32* %514; + [1000 * i32]* %516 = getelementptr &([1000 * i32]* %9)[i64 101]; + i32* %517 = getelementptr &([1000 * i32]* %516)[i64 0][i32 %4]; + store i32* %517 with i32 %515; + [1000 * i32]* %518 = getelementptr &([1000 * i32]* %6)[i64 102]; + i32* %519 = getelementptr &([1000 * i32]* %518)[i64 0][i32 %4]; + i32 %520 = load i32* %519; + [1000 * i32]* %521 = getelementptr &([1000 * i32]* %9)[i64 102]; + i32* %522 = getelementptr &([1000 * i32]* %521)[i64 0][i32 %4]; + store i32* %522 with i32 %520; + [1000 * i32]* %523 = getelementptr &([1000 * i32]* %6)[i64 103]; + i32* %524 = getelementptr &([1000 * i32]* %523)[i64 0][i32 %4]; + i32 %525 = load i32* %524; + [1000 * i32]* %526 = getelementptr &([1000 * i32]* %9)[i64 103]; + i32* %527 = getelementptr &([1000 * i32]* %526)[i64 0][i32 %4]; + store i32* %527 with i32 %525; + i32 %528 = add i32 %4, i32 1; + i1 %529 = icmp sgt i32 %1, i32 %528; + cbr i1 %529(prob = 0.984615), ^b1, ^b2; ^b2: ret; } internal func @cmmc_parallel_body_1(i32 %0, i32 %1) -> void { NoRecurse ParallelBody AlignedParallelBody } { ^b: - [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @c to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %4 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @c to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %4 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; ubr ^b1; ^b1: - i32 %5 = phi [^b, i32 %0] [^b2, i32 %184]; - [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; - [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %4)[i64 0][i32 %5]; + i32 %5 = phi [^b, i32 %0] [^b2, i32 %161]; + [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %5]; + [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; ubr ^while.body; ^while.body: - i32 %8 = phi [^b1, i32 0] [^while.body2, i32 %182]; + i32 %8 = phi [^b1, i32 0] [^while.body2, i32 %159]; + [1000 * i32]* %9 = getelementptr &([1000 * [1000 * i32]]* %4)[i64 0][i32 %8]; ubr ^while.body1; ^while.body1: - i32 %9 = phi [^while.body, i32 0] [^while.body1, i32 %123]; - i32 %10 = phi [^while.body, i32 0] [^while.body1, i32 %122]; - [1000 * i32]* %11 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %9]; - i32* %12 = getelementptr &([1000 * i32]* %11)[i64 0][i32 %8]; - i32* %13 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %9]; - i32 %14 = load i32* %13; - i32 %15 = load i32* %12; - [1000 * i32]* %16 = getelementptr &([1000 * i32]* %11)[i64 1]; - i32* %17 = getelementptr &([1000 * i32]* %16)[i64 0][i32 %8]; - i32* %18 = getelementptr &(i32* %13)[i64 1]; + i32 %10 = phi [^while.body, i32 0] [^while.body1, i32 %108]; + i32 %11 = phi [^while.body, i32 0] [^while.body1, i32 %107]; + i32* %12 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %10]; + i32 %13 = load i32* %12; + i32* %14 = getelementptr &([1000 * i32]* %9)[i64 0][i32 %10]; + i32 %15 = load i32* %14; + i32* %16 = getelementptr &(i32* %12)[i64 1]; + i32 %17 = load i32* %16; + i32* %18 = getelementptr &(i32* %14)[i64 1]; i32 %19 = load i32* %18; - i32 %20 = load i32* %17; - i32 %21 = mul i32 %19, i32 %20; - i32 %22 = mul i32 %14, i32 %15; - i32 %23 = add i32 %21, i32 %22; - [1000 * i32]* %24 = getelementptr &([1000 * i32]* %11)[i64 2]; - i32* %25 = getelementptr &([1000 * i32]* %24)[i64 0][i32 %8]; - i32* %26 = getelementptr &(i32* %13)[i64 2]; - i32 %27 = load i32* %26; - i32 %28 = load i32* %25; - i32 %29 = mul i32 %27, i32 %28; - i32 %30 = add i32 %23, i32 %29; - [1000 * i32]* %31 = getelementptr &([1000 * i32]* %11)[i64 3]; - i32* %32 = getelementptr &([1000 * i32]* %31)[i64 0][i32 %8]; - i32* %33 = getelementptr &(i32* %13)[i64 3]; - i32 %34 = load i32* %33; - i32 %35 = load i32* %32; - i32 %36 = mul i32 %34, i32 %35; - i32 %37 = add i32 %30, i32 %36; - [1000 * i32]* %38 = getelementptr &([1000 * i32]* %11)[i64 4]; - i32* %39 = getelementptr &([1000 * i32]* %38)[i64 0][i32 %8]; - i32* %40 = getelementptr &(i32* %13)[i64 4]; - i32 %41 = load i32* %40; - i32 %42 = load i32* %39; - i32 %43 = mul i32 %41, i32 %42; - i32 %44 = add i32 %37, i32 %43; - [1000 * i32]* %45 = getelementptr &([1000 * i32]* %11)[i64 5]; - i32* %46 = getelementptr &([1000 * i32]* %45)[i64 0][i32 %8]; - i32* %47 = getelementptr &(i32* %13)[i64 5]; + i32 %20 = mul i32 %17, i32 %19; + i32 %21 = mul i32 %13, i32 %15; + i32 %22 = add i32 %20, i32 %21; + i32* %23 = getelementptr &(i32* %12)[i64 2]; + i32 %24 = load i32* %23; + i32* %25 = getelementptr &(i32* %14)[i64 2]; + i32 %26 = load i32* %25; + i32 %27 = mul i32 %24, i32 %26; + i32 %28 = add i32 %22, i32 %27; + i32* %29 = getelementptr &(i32* %12)[i64 3]; + i32 %30 = load i32* %29; + i32* %31 = getelementptr &(i32* %14)[i64 3]; + i32 %32 = load i32* %31; + i32 %33 = mul i32 %30, i32 %32; + i32 %34 = add i32 %28, i32 %33; + i32* %35 = getelementptr &(i32* %12)[i64 4]; + i32 %36 = load i32* %35; + i32* %37 = getelementptr &(i32* %14)[i64 4]; + i32 %38 = load i32* %37; + i32 %39 = mul i32 %36, i32 %38; + i32 %40 = add i32 %34, i32 %39; + i32* %41 = getelementptr &(i32* %12)[i64 5]; + i32 %42 = load i32* %41; + i32* %43 = getelementptr &(i32* %14)[i64 5]; + i32 %44 = load i32* %43; + i32 %45 = mul i32 %42, i32 %44; + i32 %46 = add i32 %40, i32 %45; + i32* %47 = getelementptr &(i32* %12)[i64 6]; i32 %48 = load i32* %47; - i32 %49 = load i32* %46; - i32 %50 = mul i32 %48, i32 %49; - i32 %51 = add i32 %44, i32 %50; - [1000 * i32]* %52 = getelementptr &([1000 * i32]* %11)[i64 6]; - i32* %53 = getelementptr &([1000 * i32]* %52)[i64 0][i32 %8]; - i32* %54 = getelementptr &(i32* %13)[i64 6]; - i32 %55 = load i32* %54; - i32 %56 = load i32* %53; - i32 %57 = mul i32 %55, i32 %56; - i32 %58 = add i32 %51, i32 %57; - [1000 * i32]* %59 = getelementptr &([1000 * i32]* %11)[i64 7]; - i32* %60 = getelementptr &([1000 * i32]* %59)[i64 0][i32 %8]; - i32* %61 = getelementptr &(i32* %13)[i64 7]; + i32* %49 = getelementptr &(i32* %14)[i64 6]; + i32 %50 = load i32* %49; + i32 %51 = mul i32 %48, i32 %50; + i32 %52 = add i32 %46, i32 %51; + i32* %53 = getelementptr &(i32* %12)[i64 7]; + i32 %54 = load i32* %53; + i32* %55 = getelementptr &(i32* %14)[i64 7]; + i32 %56 = load i32* %55; + i32 %57 = mul i32 %54, i32 %56; + i32 %58 = add i32 %52, i32 %57; + i32* %59 = getelementptr &(i32* %12)[i64 8]; + i32 %60 = load i32* %59; + i32* %61 = getelementptr &(i32* %14)[i64 8]; i32 %62 = load i32* %61; - i32 %63 = load i32* %60; - i32 %64 = mul i32 %62, i32 %63; - i32 %65 = add i32 %58, i32 %64; - [1000 * i32]* %66 = getelementptr &([1000 * i32]* %11)[i64 8]; - i32* %67 = getelementptr &([1000 * i32]* %66)[i64 0][i32 %8]; - i32* %68 = getelementptr &(i32* %13)[i64 8]; - i32 %69 = load i32* %68; - i32 %70 = load i32* %67; - i32 %71 = mul i32 %69, i32 %70; - i32 %72 = add i32 %65, i32 %71; - [1000 * i32]* %73 = getelementptr &([1000 * i32]* %11)[i64 9]; - i32* %74 = getelementptr &([1000 * i32]* %73)[i64 0][i32 %8]; - i32* %75 = getelementptr &(i32* %13)[i64 9]; - i32 %76 = load i32* %75; - i32 %77 = load i32* %74; - i32 %78 = mul i32 %76, i32 %77; - i32 %79 = add i32 %72, i32 %78; - [1000 * i32]* %80 = getelementptr &([1000 * i32]* %11)[i64 10]; - i32* %81 = getelementptr &([1000 * i32]* %80)[i64 0][i32 %8]; - i32* %82 = getelementptr &(i32* %13)[i64 10]; - i32 %83 = load i32* %82; - i32 %84 = load i32* %81; - i32 %85 = mul i32 %83, i32 %84; - i32 %86 = add i32 %79, i32 %85; - [1000 * i32]* %87 = getelementptr &([1000 * i32]* %11)[i64 11]; - i32* %88 = getelementptr &([1000 * i32]* %87)[i64 0][i32 %8]; - i32* %89 = getelementptr &(i32* %13)[i64 11]; + i32 %63 = mul i32 %60, i32 %62; + i32 %64 = add i32 %58, i32 %63; + i32* %65 = getelementptr &(i32* %12)[i64 9]; + i32 %66 = load i32* %65; + i32* %67 = getelementptr &(i32* %14)[i64 9]; + i32 %68 = load i32* %67; + i32 %69 = mul i32 %66, i32 %68; + i32 %70 = add i32 %64, i32 %69; + i32* %71 = getelementptr &(i32* %12)[i64 10]; + i32 %72 = load i32* %71; + i32* %73 = getelementptr &(i32* %14)[i64 10]; + i32 %74 = load i32* %73; + i32 %75 = mul i32 %72, i32 %74; + i32 %76 = add i32 %70, i32 %75; + i32* %77 = getelementptr &(i32* %12)[i64 11]; + i32 %78 = load i32* %77; + i32* %79 = getelementptr &(i32* %14)[i64 11]; + i32 %80 = load i32* %79; + i32 %81 = mul i32 %78, i32 %80; + i32 %82 = add i32 %76, i32 %81; + i32* %83 = getelementptr &(i32* %12)[i64 12]; + i32 %84 = load i32* %83; + i32* %85 = getelementptr &(i32* %14)[i64 12]; + i32 %86 = load i32* %85; + i32 %87 = mul i32 %84, i32 %86; + i32 %88 = add i32 %82, i32 %87; + i32* %89 = getelementptr &(i32* %12)[i64 13]; i32 %90 = load i32* %89; - i32 %91 = load i32* %88; - i32 %92 = mul i32 %90, i32 %91; - i32 %93 = add i32 %86, i32 %92; - [1000 * i32]* %94 = getelementptr &([1000 * i32]* %11)[i64 12]; - i32* %95 = getelementptr &([1000 * i32]* %94)[i64 0][i32 %8]; - i32* %96 = getelementptr &(i32* %13)[i64 12]; - i32 %97 = load i32* %96; - i32 %98 = load i32* %95; - i32 %99 = mul i32 %97, i32 %98; - i32 %100 = add i32 %93, i32 %99; - [1000 * i32]* %101 = getelementptr &([1000 * i32]* %11)[i64 13]; - i32* %102 = getelementptr &([1000 * i32]* %101)[i64 0][i32 %8]; - i32* %103 = getelementptr &(i32* %13)[i64 13]; + i32* %91 = getelementptr &(i32* %14)[i64 13]; + i32 %92 = load i32* %91; + i32 %93 = mul i32 %90, i32 %92; + i32 %94 = add i32 %88, i32 %93; + i32* %95 = getelementptr &(i32* %12)[i64 14]; + i32 %96 = load i32* %95; + i32* %97 = getelementptr &(i32* %14)[i64 14]; + i32 %98 = load i32* %97; + i32 %99 = mul i32 %96, i32 %98; + i32 %100 = add i32 %94, i32 %99; + i32* %101 = getelementptr &(i32* %12)[i64 15]; + i32 %102 = load i32* %101; + i32* %103 = getelementptr &(i32* %14)[i64 15]; i32 %104 = load i32* %103; - i32 %105 = load i32* %102; - i32 %106 = mul i32 %104, i32 %105; - i32 %107 = add i32 %100, i32 %106; - [1000 * i32]* %108 = getelementptr &([1000 * i32]* %11)[i64 14]; - i32* %109 = getelementptr &([1000 * i32]* %108)[i64 0][i32 %8]; - i32* %110 = getelementptr &(i32* %13)[i64 14]; - i32 %111 = load i32* %110; - i32 %112 = load i32* %109; - i32 %113 = mul i32 %111, i32 %112; - i32 %114 = add i32 %107, i32 %113; - [1000 * i32]* %115 = getelementptr &([1000 * i32]* %11)[i64 15]; - i32* %116 = getelementptr &([1000 * i32]* %115)[i64 0][i32 %8]; - i32* %117 = getelementptr &(i32* %13)[i64 15]; - i32 %118 = load i32* %117; - i32 %119 = load i32* %116; - i32 %120 = mul i32 %118, i32 %119; - i32 %121 = add i32 %114, i32 %120; - i32 %122 = add i32 %10, i32 %121; - i32 %123 = add i32 %9, i32 16; - i1 %124 = icmp slt i32 %123, i32 992; - cbr i1 %124(prob = 0.983871), ^while.body1, ^while.body2; + i32 %105 = mul i32 %102, i32 %104; + i32 %106 = add i32 %100, i32 %105; + i32 %107 = add i32 %11, i32 %106; + i32 %108 = add i32 %10, i32 16; + i1 %109 = icmp slt i32 %108, i32 992; + cbr i1 %109(prob = 0.983871), ^while.body1, ^while.body2; ^while.body2: - [1000 * i32]* %125 = getelementptr &([1000 * i32]* %11)[i64 16]; - i32* %126 = getelementptr &([1000 * i32]* %125)[i64 0][i32 %8]; - i32* %127 = getelementptr &(i32* %13)[i64 16]; - i32 %128 = load i32* %127; - i32 %129 = load i32* %126; - i32 %130 = mul i32 %128, i32 %129; - i32 %131 = add i32 %122, i32 %130; - [1000 * i32]* %132 = getelementptr &([1000 * i32]* %11)[i64 17]; - i32* %133 = getelementptr &([1000 * i32]* %132)[i64 0][i32 %8]; - i32* %134 = getelementptr &(i32* %13)[i64 17]; + i32* %110 = getelementptr &(i32* %12)[i64 16]; + i32 %111 = load i32* %110; + i32* %112 = getelementptr &(i32* %14)[i64 16]; + i32 %113 = load i32* %112; + i32 %114 = mul i32 %111, i32 %113; + i32 %115 = add i32 %107, i32 %114; + i32* %116 = getelementptr &(i32* %12)[i64 17]; + i32 %117 = load i32* %116; + i32* %118 = getelementptr &(i32* %14)[i64 17]; + i32 %119 = load i32* %118; + i32 %120 = mul i32 %117, i32 %119; + i32 %121 = add i32 %115, i32 %120; + i32* %122 = getelementptr &(i32* %12)[i64 18]; + i32 %123 = load i32* %122; + i32* %124 = getelementptr &(i32* %14)[i64 18]; + i32 %125 = load i32* %124; + i32 %126 = mul i32 %123, i32 %125; + i32 %127 = add i32 %121, i32 %126; + i32* %128 = getelementptr &(i32* %12)[i64 19]; + i32 %129 = load i32* %128; + i32* %130 = getelementptr &(i32* %14)[i64 19]; + i32 %131 = load i32* %130; + i32 %132 = mul i32 %129, i32 %131; + i32 %133 = add i32 %127, i32 %132; + i32* %134 = getelementptr &(i32* %12)[i64 20]; i32 %135 = load i32* %134; - i32 %136 = load i32* %133; - i32 %137 = mul i32 %135, i32 %136; - i32 %138 = add i32 %131, i32 %137; - [1000 * i32]* %139 = getelementptr &([1000 * i32]* %11)[i64 18]; - i32* %140 = getelementptr &([1000 * i32]* %139)[i64 0][i32 %8]; - i32* %141 = getelementptr &(i32* %13)[i64 18]; - i32 %142 = load i32* %141; - i32 %143 = load i32* %140; - i32 %144 = mul i32 %142, i32 %143; - i32 %145 = add i32 %138, i32 %144; - [1000 * i32]* %146 = getelementptr &([1000 * i32]* %11)[i64 19]; - i32* %147 = getelementptr &([1000 * i32]* %146)[i64 0][i32 %8]; - i32* %148 = getelementptr &(i32* %13)[i64 19]; + i32* %136 = getelementptr &(i32* %14)[i64 20]; + i32 %137 = load i32* %136; + i32 %138 = mul i32 %135, i32 %137; + i32 %139 = add i32 %133, i32 %138; + i32* %140 = getelementptr &(i32* %12)[i64 21]; + i32 %141 = load i32* %140; + i32* %142 = getelementptr &(i32* %14)[i64 21]; + i32 %143 = load i32* %142; + i32 %144 = mul i32 %141, i32 %143; + i32 %145 = add i32 %139, i32 %144; + i32* %146 = getelementptr &(i32* %12)[i64 22]; + i32 %147 = load i32* %146; + i32* %148 = getelementptr &(i32* %14)[i64 22]; i32 %149 = load i32* %148; - i32 %150 = load i32* %147; - i32 %151 = mul i32 %149, i32 %150; - i32 %152 = add i32 %145, i32 %151; - [1000 * i32]* %153 = getelementptr &([1000 * i32]* %11)[i64 20]; - i32* %154 = getelementptr &([1000 * i32]* %153)[i64 0][i32 %8]; - i32* %155 = getelementptr &(i32* %13)[i64 20]; - i32 %156 = load i32* %155; - i32 %157 = load i32* %154; - i32 %158 = mul i32 %156, i32 %157; - i32 %159 = add i32 %152, i32 %158; - [1000 * i32]* %160 = getelementptr &([1000 * i32]* %11)[i64 21]; - i32* %161 = getelementptr &([1000 * i32]* %160)[i64 0][i32 %8]; - i32* %162 = getelementptr &(i32* %13)[i64 21]; - i32 %163 = load i32* %162; - i32 %164 = load i32* %161; - i32 %165 = mul i32 %163, i32 %164; - i32 %166 = add i32 %159, i32 %165; - [1000 * i32]* %167 = getelementptr &([1000 * i32]* %11)[i64 22]; - i32* %168 = getelementptr &([1000 * i32]* %167)[i64 0][i32 %8]; - i32* %169 = getelementptr &(i32* %13)[i64 22]; - i32 %170 = load i32* %169; - i32 %171 = load i32* %168; - i32 %172 = mul i32 %170, i32 %171; - i32 %173 = add i32 %166, i32 %172; - [1000 * i32]* %174 = getelementptr &([1000 * i32]* %11)[i64 23]; - i32* %175 = getelementptr &([1000 * i32]* %174)[i64 0][i32 %8]; - i32* %176 = getelementptr &(i32* %13)[i64 23]; - i32 %177 = load i32* %176; - i32 %178 = load i32* %175; - i32 %179 = mul i32 %177, i32 %178; - i32 %180 = add i32 %173, i32 %179; - i32* %181 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %8]; - store i32* %181 with i32 %180; - i32 %182 = add i32 %8, i32 1; - i1 %183 = icmp slt i32 %182, i32 1000; - cbr i1 %183(prob = 0.999), ^while.body, ^b2; + i32 %150 = mul i32 %147, i32 %149; + i32 %151 = add i32 %145, i32 %150; + i32* %152 = getelementptr &(i32* %12)[i64 23]; + i32 %153 = load i32* %152; + i32* %154 = getelementptr &(i32* %14)[i64 23]; + i32 %155 = load i32* %154; + i32 %156 = mul i32 %153, i32 %155; + i32 %157 = add i32 %151, i32 %156; + i32* %158 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %8]; + store i32* %158 with i32 %157; + i32 %159 = add i32 %8, i32 1; + i1 %160 = icmp slt i32 %159, i32 1000; + cbr i1 %160(prob = 0.999), ^while.body, ^b2; ^b2: - i32 %184 = add i32 %5, i32 1; - i1 %185 = icmp sgt i32 %1, i32 %184; - cbr i1 %185(prob = 0.984615), ^b1, ^b3; + i32 %161 = add i32 %5, i32 1; + i1 %162 = icmp sgt i32 %1, i32 %161; + cbr i1 %162(prob = 0.984615), ^b1, ^b3; ^b3: ret; } diff --git a/tests/SysY2022/performance/matmul3.riscv.s b/tests/SysY2022/performance/matmul3.riscv.s index cbbefb151..301b801f7 100644 --- a/tests/SysY2022/performance/matmul3.riscv.s +++ b/tests/SysY2022/performance/matmul3.riscv.s @@ -21,340 +21,355 @@ cmmc_parallel_body_payload_3: .globl main main: addi sp, sp, -88 -pcrel2037: +pcrel2255: auipc a0, %pcrel_hi(a) -pcrel2038: - auipc a1, %pcrel_hi(cmmc_parallel_body_2) +pcrel2256: + auipc a1, %pcrel_hi(cmmc_parallel_body_3) sd ra, 0(sp) sd s5, 8(sp) - addi s5, a0, %pcrel_lo(pcrel2037) + addi s5, a0, %pcrel_lo(pcrel2255) sd s0, 16(sp) -pcrel2039: +pcrel2257: auipc a0, %pcrel_hi(c) sd s7, 24(sp) -pcrel2040: +pcrel2258: auipc s7, %pcrel_hi(cmmc_parallel_body_payload_2) sd s8, 32(sp) - addi s8, s7, %pcrel_lo(pcrel2040) + addi s8, s7, %pcrel_lo(pcrel2258) sd s1, 40(sp) - addi s1, a0, %pcrel_lo(pcrel2039) + addi s1, a0, %pcrel_lo(pcrel2257) sd s6, 48(sp) -pcrel2041: - auipc a0, %pcrel_hi(cmmc_parallel_body_3) - addi s6, a1, %pcrel_lo(pcrel2038) +pcrel2259: + auipc a0, %pcrel_hi(cmmc_parallel_body_2) sd s9, 56(sp) + addi s6, a0, %pcrel_lo(pcrel2259) mv s9, zero + li a0, 125 sd s2, 64(sp) - addi s2, a0, %pcrel_lo(pcrel2041) + slli s0, a0, 5 + addi s2, a1, %pcrel_lo(pcrel2256) sd s3, 72(sp) - li a0, 125 + slli s3, s0, 1 sd s4, 80(sp) - slli s0, a0, 5 sh1add s4, s0, s0 - slli s3, s0, 1 -label1349: +label1566: li a0, 1000 - bge s9, a0, label1356 + bge s9, a0, label1571 mv a0, s5 jal getarray li a1, 1000 - bne a0, a1, label1354 + beq a0, a1, label1570 +label1599: + ld ra, 0(sp) + ld s5, 8(sp) + ld s0, 16(sp) + ld s7, 24(sp) + ld s8, 32(sp) + ld s1, 40(sp) + ld s6, 48(sp) + ld s9, 56(sp) + ld s2, 64(sp) + ld s3, 72(sp) + ld s4, 80(sp) + addi sp, sp, 88 + ret +label1570: addiw s9, s9, 1 add s5, s5, s0 - j label1349 -label1356: + j label1566 +label1571: li a0, 23 jal _sysy_starttime li a1, 1000 mv a0, zero -pcrel2042: +pcrel2260: auipc a3, %pcrel_hi(cmmc_parallel_body_0) - addi a2, a3, %pcrel_lo(pcrel2042) + addi a2, a3, %pcrel_lo(pcrel2260) jal cmmcParallelFor li a1, 1000 mv a0, zero -pcrel2043: +pcrel2261: auipc a3, %pcrel_hi(cmmc_parallel_body_1) - addi a2, a3, %pcrel_lo(pcrel2043) + addi a2, a3, %pcrel_lo(pcrel2261) jal cmmcParallelFor mv s9, zero mv s5, s1 mv a0, s1 mv a1, zero - lui a3, 524288 - addiw a2, a3, -1 - j label1360 + lui a4, 524288 + addiw a2, a4, -1 + j label1575 .p2align 2 -label1383: +label1598: addi a0, a0, 256 .p2align 2 -label1360: - ld a4, 0(a0) +label1575: + ld a3, 0(a0) addiw a1, a1, 64 - srai t1, a4, 32 - sext.w a3, a4 - min t0, a2, a3 - ld a3, 8(a0) - min a5, t0, t1 - srai t2, a3, 32 - sext.w a2, a3 - min t0, a5, a2 - ld a2, 16(a0) - min a4, t0, t2 - sext.w t1, a2 - srai t2, a2, 32 - min a5, a4, t1 - ld a4, 24(a0) - min a3, a5, t2 - srai t1, a4, 32 - sext.w t0, a4 - min a5, a3, t0 - ld a3, 32(a0) - min a2, a5, t1 - srai t0, a3, 32 - sext.w t2, a3 - min a5, a2, t2 - ld a2, 40(a0) - min a4, a5, t0 - srai t1, a2, 32 + srai t1, a3, 32 + sext.w t0, a3 + min a5, a2, t0 + ld a2, 8(a0) + min a4, a5, t1 sext.w a3, a2 + srai t1, a2, 32 min t0, a4, a3 - ld a3, 48(a0) + ld a3, 16(a0) min a5, t0, t1 srai t2, a3, 32 sext.w a4, a3 min t0, a5, a4 - ld a4, 56(a0) + ld a4, 24(a0) min a2, t0, t2 sext.w t1, a4 srai t0, a4, 32 min a5, a2, t1 - ld a2, 64(a0) + ld a2, 32(a0) min a3, a5, t0 sext.w t1, a2 srai t0, a2, 32 min a5, a3, t1 - ld a3, 72(a0) + ld a3, 40(a0) min a4, a5, t0 srai t1, a3, 32 sext.w a2, a3 min t0, a4, a2 - ld a2, 80(a0) + ld a2, 48(a0) min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 min t0, a5, a3 - ld a3, 88(a0) + ld a3, 56(a0) min a4, t0, t1 sext.w a2, a3 srai t1, a3, 32 - min a5, a4, a2 - ld a2, 96(a0) - min t0, a5, t1 + min t0, a4, a2 + ld a2, 64(a0) + min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 - min a4, t0, a3 - ld a3, 104(a0) - min a5, a4, t1 - sext.w a2, a3 - srai t1, a3, 32 - min t0, a5, a2 - ld a2, 112(a0) + min t0, a5, a3 + ld a3, 72(a0) min a4, t0, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 120(a0) - min a5, t0, t1 sext.w a2, a3 srai t1, a3, 32 - min t0, a5, a2 - ld a2, 128(a0) - min a4, t0, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 136(a0) + min t0, a4, a2 + ld a2, 80(a0) min a5, t0, t1 - sext.w a2, a3 - srai t1, a3, 32 - min a4, a5, a2 - ld a2, 144(a0) - min t0, a4, t1 + srai t2, a2, 32 sext.w a3, a2 - srai t1, a2, 32 - min a5, t0, a3 - ld a3, 152(a0) - min a4, a5, t1 - sext.w a2, a3 + min t0, a5, a3 + ld a3, 88(a0) + min a4, t0, t2 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a4, t1 + ld a4, 96(a0) + min a2, a5, t0 + sext.w t1, a4 + srai t0, a4, 32 + min a5, a2, t1 + ld a2, 104(a0) + min a3, a5, t0 + sext.w t1, a2 + srai t0, a2, 32 + min a5, a3, t1 + ld a3, 112(a0) + min a4, a5, t0 srai t1, a3, 32 + sext.w a2, a3 min t0, a4, a2 - ld a2, 160(a0) + ld a2, 120(a0) min a5, t0, t1 sext.w a3, a2 srai t1, a2, 32 min t0, a5, a3 - ld a3, 168(a0) + ld a3, 128(a0) min a4, t0, t1 sext.w a2, a3 srai t1, a3, 32 min t0, a4, a2 - ld a2, 176(a0) + ld a2, 136(a0) + min a5, t0, t1 + srai t2, a2, 32 + sext.w a4, a2 + min t0, a5, a4 + ld a4, 144(a0) + min a3, t0, t2 + sext.w t1, a4 + srai t0, a4, 32 + min a5, a3, t1 + ld a3, 152(a0) + min a2, a5, t0 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a2, t1 + ld a2, 160(a0) + min a4, a5, t0 + srai t1, a2, 32 + sext.w a3, a2 + min t0, a4, a3 + ld a3, 168(a0) min a5, t0, t1 + sext.w a2, a3 + srai t1, a3, 32 + min a4, a5, a2 + ld a2, 176(a0) + min t0, a4, t1 sext.w a3, a2 srai t1, a2, 32 - min a4, a5, a3 + min a5, t0, a3 ld a3, 184(a0) - min t0, a4, t1 + min a4, a5, t1 sext.w a2, a3 srai t1, a3, 32 - min a5, t0, a2 + min t0, a4, a2 ld a2, 192(a0) - min a4, a5, t1 - sext.w t0, a2 + min a5, t0, t1 + sext.w a4, a2 srai t1, a2, 32 - min a5, a4, t0 + min t0, a5, a4 ld a4, 200(a0) - min a3, a5, t1 - sext.w t0, a4 + min a3, t0, t1 + sext.w t2, a4 + srai t0, a4, 32 + min a5, a3, t2 + ld a3, 208(a0) + min a2, a5, t0 + sext.w t1, a3 + srai t0, a3, 32 + min a5, a2, t1 + ld a2, 216(a0) + min a4, a5, t0 + srai t2, a2, 32 + sext.w t1, a2 + min a5, a4, t1 + ld a4, 224(a0) + min a3, a5, t2 srai t1, a4, 32 + sext.w t0, a4 min a5, a3, t0 - ld a3, 208(a0) + ld a3, 232(a0) min a2, a5, t1 sext.w t0, a3 srai t1, a3, 32 - min a5, a2, t0 - ld a2, 216(a0) - min a4, a5, t1 - sext.w a3, a2 - srai t1, a2, 32 - min t0, a4, a3 - ld a3, 224(a0) - min a5, t0, t1 - sext.w a2, a3 - srai t1, a3, 32 - min a4, a5, a2 - ld a2, 232(a0) - min t0, a4, t1 + min a4, a2, t0 + ld a2, 240(a0) + min a5, a4, t1 sext.w a3, a2 srai t1, a2, 32 - min a5, t0, a3 - ld a3, 240(a0) - min a4, a5, t1 - srai t0, a3, 32 + min t0, a5, a3 + ld a3, 248(a0) + min a4, t0, t1 sext.w t2, a3 - min a2, a4, t2 - ld a4, 248(a0) - min a5, a2, t0 - sext.w t1, a4 - srai t0, a4, 32 - min a3, a5, t1 - li a4, 960 - min a2, a3, t0 - blt a1, a4, label1383 - ld a3, 256(a0) srai t0, a3, 32 - sext.w a1, a3 - min a5, a2, a1 - ld a1, 264(a0) - min a4, a5, t0 - sext.w a2, a1 - srai t0, a1, 32 - min a5, a4, a2 - ld a2, 272(a0) - min a3, a5, t0 - sext.w a1, a2 - srai t0, a2, 32 - min a5, a3, a1 - ld a1, 280(a0) - min a4, a5, t0 - sext.w a3, a1 - srai t0, a1, 32 - min a5, a4, a3 - ld a3, 288(a0) + min a5, a4, t2 + li a3, 960 min a2, a5, t0 - sext.w t1, a3 - srai a5, a3, 32 - min a4, a2, t1 - ld a2, 296(a0) - min a1, a4, a5 - sext.w t0, a2 - srai a5, a2, 32 - min a4, a1, t0 - ld a1, 304(a0) - min a3, a4, a5 + blt a1, a3, label1598 + ld a1, 256(a0) srai t0, a1, 32 - sext.w a2, a1 - min a5, a3, a2 - ld a2, 312(a0) - min a4, a5, t0 + sext.w a5, a1 + min a3, a2, a5 + ld a2, 264(a0) + min a4, a3, t0 sext.w a1, a2 srai t0, a2, 32 min a5, a4, a1 - ld a1, 320(a0) + ld a1, 272(a0) min a3, a5, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 328(a0) + ld a2, 280(a0) min a4, a5, t0 - srai t1, a2, 32 - sext.w a3, a2 - min a5, a4, a3 - ld a3, 336(a0) - min a1, a5, t1 - sext.w t0, a3 - srai t1, a3, 32 - min a4, a1, t0 - ld a1, 344(a0) - min a2, a4, t1 - srai t0, a1, 32 - sext.w a5, a1 - min a3, a2, a5 - ld a2, 352(a0) - min a4, a3, t0 sext.w a1, a2 srai t0, a2, 32 min a5, a4, a1 - ld a1, 360(a0) + ld a1, 288(a0) min a3, a5, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 368(a0) + ld a2, 296(a0) min a4, a5, t0 sext.w a1, a2 srai t0, a2, 32 - min a5, a4, a1 - ld a1, 376(a0) - min a3, a5, t0 + min a3, a4, a1 + ld a1, 304(a0) + min a5, a3, t0 sext.w a2, a1 srai t0, a1, 32 - min a5, a3, a2 - ld a2, 384(a0) - min a4, a5, t0 + min a4, a5, a2 + ld a2, 312(a0) + min a3, a4, t0 sext.w a1, a2 srai t0, a2, 32 - min a5, a4, a1 - ld a1, 392(a0) - min a3, a5, t0 + min a5, a3, a1 + ld a1, 320(a0) + min a4, a5, t0 + srai t1, a1, 32 + sext.w a3, a1 + min a5, a4, a3 + ld a3, 328(a0) + min a2, a5, t1 + sext.w t0, a3 + srai t1, a3, 32 + min a4, a2, t0 + ld a2, 336(a0) + min a1, a4, t1 + srai t0, a2, 32 + sext.w a5, a2 + min a4, a1, a5 + ld a1, 344(a0) + min a3, a4, t0 sext.w a2, a1 srai t0, a1, 32 min a5, a3, a2 - ld a2, 400(a0) + ld a2, 352(a0) min a4, a5, t0 - srai t1, a2, 32 sext.w a1, a2 + srai t0, a2, 32 min a5, a4, a1 + ld a1, 360(a0) + min a3, a5, t0 + sext.w t1, a1 + srai t0, a1, 32 + min a4, a3, t1 + ld a3, 368(a0) + min a2, a4, t0 + sext.w a5, a3 + srai t0, a3, 32 + min a4, a2, a5 + ld a2, 376(a0) + min a1, a4, t0 + sext.w a5, a2 + srai t0, a2, 32 + min a4, a1, a5 + ld a1, 384(a0) + min a3, a4, t0 + sext.w a5, a1 + srai t0, a1, 32 + min a4, a3, a5 + ld a3, 392(a0) + min a2, a4, t0 + sext.w a5, a3 + srai t0, a3, 32 + min a4, a2, a5 + ld a2, 400(a0) + min a1, a4, t0 + sext.w a5, a2 + srai t0, a2, 32 + min a4, a1, a5 ld a1, 408(a0) - min a3, a5, t1 -pcrel2044: + min a3, a4, t0 +pcrel2262: auipc s7, %pcrel_hi(cmmc_parallel_body_payload_2) - sd s5, %pcrel_lo(pcrel2044)(s7) + sd s5, %pcrel_lo(pcrel2262)(s7) srai a0, a1, 32 - sext.w t0, a1 + sext.w a5, a1 li a1, 1000 - min a4, a3, t0 + min a4, a3, a5 min a2, a4, a0 mv a0, zero sw a2, 8(s8) @@ -362,1174 +377,1276 @@ pcrel2044: jal cmmcParallelFor li a0, 1000 addiw s9, s9, 1 - bge s9, a0, label1365 + bge s9, a0, label1580 add s5, s5, s0 mv a1, zero - lui a3, 524288 + lui a4, 524288 mv a0, s5 - addiw a2, a3, -1 - j label1360 -label1381: - li a0, 92 - jal _sysy_stoptime - mv a0, s6 - jal putint - mv a0, zero -label1354: - ld ra, 0(sp) - ld s5, 8(sp) - ld s0, 16(sp) - ld s7, 24(sp) - ld s8, 32(sp) - ld s1, 40(sp) - ld s6, 48(sp) - ld s9, 56(sp) - ld s2, 64(sp) - ld s3, 72(sp) - ld s4, 80(sp) - addi sp, sp, 88 - ret -label1365: + addiw a2, a4, -1 + j label1575 +label1580: mv a2, s1 mv a0, zero mv a1, s1 mv a4, zero - j label1369 + j label1584 .p2align 2 -label1372: +label1596: addi a1, a1, 64 .p2align 2 -label1369: +label1584: mul t0, a4, s0 - li t5, 375 - li t6, 875 addiw a4, a4, 16 add a3, s1, t0 sh2add a5, a0, a3 - add t3, a3, s4 + add t2, a3, s0 lw t1, 0(a5) - add a5, a3, s0 + sh2add a5, a0, t2 subw t0, zero, t1 - sh2add t1, a0, a5 + add t2, a3, s3 sw t0, 0(a1) - lw t0, 0(t1) - add t1, a3, s3 - subw t2, zero, t0 - sh2add a5, a0, t1 - sh2add t1, a0, t3 - sw t2, 4(a1) - lw t2, 0(a5) - subw t0, zero, t2 - sw t0, 8(a1) - lw a5, 0(t1) - li t1, 125 - subw t0, zero, a5 - slli t2, t1, 7 + lw t1, 0(a5) + sh2add a5, a0, t2 + subw t0, zero, t1 + sw t0, 4(a1) + add t0, a3, s4 + lw t3, 0(a5) + sh2add t2, a0, t0 + subw t1, zero, t3 + li t0, 125 + sw t1, 8(a1) + lw a5, 0(t2) + slli t2, t0, 7 + subw t1, zero, a5 add a5, a3, t2 - sw t0, 12(a1) - sh2add t0, a0, a5 - lw t1, 0(t0) - li t0, 625 - subw t3, zero, t1 - slli t1, t0, 5 + sh2add t3, a0, a5 + sw t1, 12(a1) + lw t1, 0(t3) + li t3, 625 + subw t0, zero, t1 + slli t1, t3, 5 add a5, a3, t1 - sw t3, 16(a1) + sw t0, 16(a1) sh2add t4, a0, a5 + li a5, 375 lw t0, 0(t4) subw t3, zero, t0 - slli t0, t5, 6 + slli t0, a5, 6 add t4, a3, t0 sw t3, 20(a1) - sh2add a5, a0, t4 - lw t3, 0(a5) - slli a5, t6, 5 - subw t5, zero, t3 - add t4, a3, a5 - sw t5, 24(a1) sh2add t5, a0, t4 - slli t4, t2, 1 - lw t3, 0(t5) - subw t6, zero, t3 - add t3, a3, t4 - sh2add t5, a0, t3 - sw t6, 28(a1) - lw t2, 0(t5) + lw a5, 0(t5) + li t5, 875 + subw t3, zero, a5 + slli a5, t5, 5 + add t4, a3, a5 + sw t3, 24(a1) + sh2add t3, a0, t4 + lw t6, 0(t3) + slli t3, t2, 1 + subw t5, zero, t6 + add t6, a3, t3 + sh2add t4, a0, t6 + sw t5, 28(a1) li t5, 1125 - subw t4, zero, t2 + lw t2, 0(t4) + subw t3, zero, t2 slli t2, t5, 5 - sw t4, 32(a1) add t4, a3, t2 + sw t3, 32(a1) + slli t2, t1, 1 sh2add t3, a0, t4 lw t5, 0(t3) - slli t3, t1, 1 - subw t2, zero, t5 - add t5, a3, t3 - li t3, 1375 - sw t2, 36(a1) - sh2add t2, a0, t5 - lw t4, 0(t2) - slli t2, t3, 5 - subw t1, zero, t4 + subw t6, zero, t5 + add t5, a3, t2 + sh2add t4, a0, t5 + sw t6, 36(a1) + lw t3, 0(t4) + li t4, 1375 + subw t1, zero, t3 + slli t3, t4, 5 + add t2, a3, t3 sw t1, 40(a1) - add t1, a3, t2 - sh2add t5, a0, t1 - lw t3, 0(t5) - slli t5, t0, 1 - subw t4, zero, t3 - add t2, a3, t5 sh2add t1, a0, t2 - sw t4, 44(a1) - li t4, 1625 - lw t3, 0(t1) - slli t2, t4, 5 + slli t2, t0, 1 + lw t4, 0(t1) + add t1, a3, t2 + subw t3, zero, t4 + li t2, 1625 + sh2add t4, a0, t1 + slli t1, t2, 5 + sw t3, 44(a1) + lw t3, 0(t4) subw t0, zero, t3 - sw t0, 48(a1) - add t0, a3, t2 - sh2add t1, a0, t0 - lw t3, 0(t1) - slli t1, a5, 1 - subw t2, zero, t3 add t3, a3, t1 + sh2add t4, a0, t3 + sw t0, 48(a1) + slli t3, a5, 1 + lw t0, 0(t4) + add t1, a3, t3 + li t4, 1875 + subw t2, zero, t0 + slli t3, t4, 5 + sh2add t0, a0, t1 sw t2, 52(a1) - sh2add t2, a0, t3 - li t3, 1875 - lw t0, 0(t2) + lw a5, 0(t0) + add t0, a3, t3 + subw t2, zero, a5 + sh2add t1, a0, t0 + sw t2, 56(a1) + lw a5, 0(t1) + subw t2, zero, a5 + li a5, 992 + sw t2, 60(a1) + blt a4, a5, label1596 + li t0, 125 + lui t2, 17 + slli a5, t0, 9 + add t1, a3, a5 + sh2add a4, a0, t1 + lw t0, 0(a4) + addiw a4, t2, -1632 subw a5, zero, t0 - slli t0, t3, 5 - add t2, a3, t0 - sw a5, 56(a1) - li t0, 992 - sh2add t1, a0, t2 - lw t3, 0(t1) - subw a5, zero, t3 - sw a5, 60(a1) - blt a4, t0, label1372 - li t2, 125 - slli a5, t2, 9 + li t2, 1125 + add t0, a3, a4 + sh2add t1, a0, t0 + sw a5, 64(a1) + lw a5, 0(t1) + subw a4, zero, a5 + slli a5, t2, 6 + lui t2, 19 add t1, a3, a5 + sw a4, 68(a1) sh2add t0, a0, t1 lw a4, 0(t0) - lui t0, 17 subw a5, zero, a4 - addiw a4, t0, -1632 - add t2, a3, a4 - sw a5, 64(a1) - sh2add t1, a0, t2 - li t2, 1125 - lw t0, 0(t1) - slli a4, t2, 6 - subw a5, zero, t0 - lui t2, 21 + addiw a4, t2, -1824 + li t2, 625 add t1, a3, a4 + sw a5, 72(a1) sh2add t0, a0, t1 - sw a5, 68(a1) - lui t1, 19 lw a5, 0(t0) - addiw t0, t1, -1824 subw a4, zero, a5 - add a5, a3, t0 - sw a4, 72(a1) - sh2add a4, a0, a5 - lw t1, 0(a4) - li a4, 625 - subw t0, zero, t1 - slli a5, a4, 7 - add t1, a3, a5 - sw t0, 76(a1) - sh2add t0, a0, t1 - lw a4, 0(t0) + slli a5, t2, 7 + add t0, a3, a5 + sw a4, 76(a1) + sh2add t1, a0, t0 + lui t0, 21 + lw a4, 0(t1) subw a5, zero, a4 - addiw a4, t2, -2016 - li t2, 1375 + addiw a4, t0, -2016 add t1, a3, a4 sw a5, 80(a1) + sh2add a5, a0, t1 + lw t0, 0(a5) + li a5, 1375 + subw a4, zero, t0 + slli t2, a5, 6 + add t1, a3, t2 + sw a4, 84(a1) sh2add t0, a0, t1 - slli t1, t2, 6 lw a5, 0(t0) - add t0, a3, t1 + lui t0, 22 subw a4, zero, a5 - sh2add a5, a0, t0 - sw a4, 84(a1) - lw a4, 0(a5) - lui a5, 22 - subw t1, zero, a4 - addiw t0, a5, 1888 - sw t1, 88(a1) - add t1, a3, t0 - sh2add a4, a0, t1 + addiw t1, t0, 1888 + add a5, a3, t1 + sw a4, 88(a1) + sh2add a4, a0, a5 addiw a0, a0, 1 - lw a5, 0(a4) + lw t0, 0(a4) li a4, 1000 - subw a3, zero, a5 + subw a3, zero, t0 sw a3, 92(a1) - bge a0, a4, label1879 + bge a0, a4, label1588 add a2, a2, s0 mv a4, zero mv a1, a2 - j label1369 -label1879: - auipc a1, %pcrel_hi(cmmc_parallel_body_payload_3) + j label1584 +label1588: + auipc a0, %pcrel_hi(cmmc_parallel_body_payload_3) mv s4, zero mv s6, zero - addi s3, a1, %pcrel_lo(label1879) + addi s3, a0, %pcrel_lo(label1588) .p2align 2 -label1376: +label1589: auipc s5, %pcrel_hi(cmmc_parallel_body_payload_3) mv a0, zero li a1, 1000 - sw s6, %pcrel_lo(label1376)(s5) + sw s6, %pcrel_lo(label1589)(s5) sw s6, 4(s3) sd s1, 8(s3) mv a2, s2 jal cmmcParallelFor li a0, 1000 addiw s4, s4, 1 - lw s6, %pcrel_lo(label1376)(s5) - bge s4, a0, label1381 + lw s6, %pcrel_lo(label1589)(s5) + bge s4, a0, label1593 add s1, s1, s0 - j label1376 + j label1589 +label1593: + li a0, 92 + jal _sysy_stoptime + mv a0, s6 + jal putint + mv a0, zero + j label1599 .p2align 2 cmmc_parallel_body_0: - addi sp, sp, -24 - mv t5, a1 -pcrel772: - auipc a4, %pcrel_hi(b) - li a5, 125 - sd s1, 0(sp) - addi a2, a4, %pcrel_lo(pcrel772) - slli a3, a5, 5 - sd s0, 8(sp) - sh2add t2, a3, a3 - sh1add t0, a3, a3 - slli a5, a3, 1 - mul a1, a0, a3 - sd s2, 16(sp) - slli t3, t0, 1 - slli t1, a5, 1 - add t4, a2, a1 -pcrel773: + addi sp, sp, -32 + mv t4, a1 +pcrel1081: + auipc a2, %pcrel_hi(b) +pcrel1082: auipc a1, %pcrel_hi(a) - addi a4, a1, %pcrel_lo(pcrel773) - mv a1, t4 - mv t6, zero - j label5 + sd s0, 0(sp) + addi t3, a2, %pcrel_lo(pcrel1081) + addi t5, a1, %pcrel_lo(pcrel1082) + li a2, 125 + sd s1, 8(sp) + slli a3, a2, 5 + sd s2, 16(sp) + sh2add t1, a3, a3 + sh1add a5, a3, a3 + slli a4, a3, 1 + sd s3, 24(sp) + slli t2, a5, 1 + slli t0, a4, 1 + j label2 .p2align 2 -label9: +label8: li a6, 125 - lui s0, 63 + lui s2, 63 slli t6, a6, 11 + add a7, a1, t6 + sh2add s0, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 1952 - lui s0, 64 - add a7, a2, a6 - sw t6, 256(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, 1856 - lui s0, 66 - add a7, a2, t6 - sw a6, 260(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 65 - sw t6, 264(a1) - addiw t6, a6, 1760 + lw a6, 0(s0) + addiw t6, s2, 1952 + sh2add s1, a0, a7 + lui s2, 64 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 1664 - lui s0, 67 - add a7, a2, a6 - sw t6, 268(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, 1568 + lw a6, 0(s0) + addiw t6, s2, 1856 + sh2add s1, a0, a7 + lui s2, 65 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 add a7, a2, t6 - sw a6, 272(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 68 - addiw a7, a6, 1472 - sw t6, 276(a1) - add t6, a2, a7 - sh2add a6, a0, t6 - lui t6, 69 - lw a7, 0(a6) - addiw a6, t6, 1376 - sw a7, 280(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - li a7, 1125 - lw a6, 0(t6) - slli t6, a7, 8 - add s0, a2, t6 - sw a6, 284(a1) + lw a6, 0(s0) + addiw t6, s2, 1760 + sh2add s1, a0, a7 + lui s2, 66 + add a7, a1, t6 + sw a6, 0(s1) + sh2add s0, a0, a7 + add s1, a2, t6 + lw a6, 0(s0) + addiw t6, s2, 1664 + sh2add a7, a0, s1 + lui s2, 67 + add s0, a1, t6 + add s1, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 1568 sh2add a7, a0, s0 + lui s2, 68 + lw a6, 0(a7) + sh2add a7, a0, s1 + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + addiw t6, s2, 1472 + sh2add s0, a0, a7 + li s2, 1125 + sh2add a7, a0, s1 + lw a6, 0(s0) + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s0, a0, a7 + sh2add a7, a0, s1 + lw a6, 0(s0) + lui s0, 69 + addiw t6, s0, 1376 + sw a6, 0(a7) + add s0, a2, t6 + add s1, a1, t6 + slli t6, s2, 8 + sh2add a7, a0, s1 + lui s2, 73 lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 + add a7, a2, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 lui a7, 71 addiw t6, a7, 1184 - sw a6, 288(a1) + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - lui t6, 72 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 1088 - sw a7, 292(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 73 - lw a6, 0(t6) - addiw t6, a7, 992 - add s0, a2, t6 - sw a6, 296(a1) - lui t6, 74 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 896 - sw a7, 300(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 75 - lw a6, 0(t6) - addiw t6, a7, 800 - add s0, a2, t6 - sw a6, 304(a1) + sh2add a7, a0, s1 + lui s1, 72 + lw a6, 0(a7) + addiw t6, s1, 1088 + sh2add a7, a0, s0 + add s1, a2, t6 + sw a6, 0(a7) + add a7, a1, t6 + addiw t6, s2, 992 + sh2add s0, a0, a7 + lui s2, 75 + sh2add a7, a0, s1 + lw a6, 0(s0) + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) sh2add a7, a0, s0 + lui s0, 74 lw a6, 0(a7) + addiw t6, s0, 896 + sh2add a7, a0, s1 + add s0, a2, t6 + add s1, a1, t6 + sw a6, 0(a7) + addiw t6, s2, 800 + sh2add a7, a0, s1 + lui s2, 77 + lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 + add a7, a2, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 lui a7, 76 addiw t6, a7, 704 - sw a6, 308(a1) + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - lui t6, 77 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 608 - sw a7, 312(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - li a7, 625 - lw a6, 0(t6) - slli t6, a7, 9 - add s0, a2, t6 - sw a6, 316(a1) - lui t6, 79 - sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, t6, 416 - sw a7, 320(a1) - add a7, a2, a6 - sh2add t6, a0, a7 - lui a7, 80 - lw a6, 0(t6) - addiw t6, a7, 320 + sh2add a7, a0, s1 + addiw t6, s2, 608 + lw a6, 0(a7) + li s2, 625 + add s1, a1, t6 + sh2add a7, a0, s0 add s0, a2, t6 - sw a6, 324(a1) + sw a6, 0(a7) + slli t6, s2, 9 + sh2add a7, a0, s1 + lui s2, 79 + add s1, a1, t6 + lw a6, 0(a7) sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 416 + sh2add a7, a0, s1 + lui s2, 80 + add s1, a1, t6 lw a6, 0(a7) - lui a7, 81 - addiw t6, a7, 224 - sw a6, 328(a1) + sh2add a7, a0, s0 add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 320 + sh2add a7, a0, s1 + lui s2, 81 + add s1, a1, t6 + lw a6, 0(a7) sh2add a7, a0, s0 - lui s0, 82 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, 224 + sh2add a7, a0, s1 + lui s2, 83 lw a6, 0(a7) - addiw t6, s0, 128 - lui s0, 83 - add a7, a2, t6 - sw a6, 332(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, 32 - lui s0, 84 - add a7, a2, a6 - sw t6, 336(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -64 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 340(a1) - sh2add a6, a0, a7 - lui a7, 85 - lw t6, 0(a6) - addiw a6, a7, -160 - sw t6, 344(a1) - add t6, a2, a6 - li a6, 1375 - sh2add s0, a0, t6 - slli t6, a6, 8 - lw a7, 0(s0) + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 82 + addiw t6, a7, 128 + sw a6, 0(s0) + add s1, a1, t6 add s0, a2, t6 - sw a7, 348(a1) - sh2add a7, a0, s0 - lui s0, 87 + sh2add a7, a0, s1 + addiw t6, s2, 32 lw a6, 0(a7) - addiw t6, s0, -352 - lui s0, 88 - add a7, a2, t6 - sw a6, 352(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -448 - lui s0, 89 - add a7, a2, a6 - sw t6, 356(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -544 - lui s0, 91 - add a7, a2, t6 - sw a6, 360(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 90 - sw t6, 364(a1) - addiw t6, a6, -640 - add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -736 - lui s0, 92 - add a7, a2, a6 - sw t6, 368(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -832 - li s0, 375 - add a7, a2, t6 - sw a6, 372(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 93 - sw t6, 376(a1) - addiw t6, a6, -928 + lui s2, 85 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - slli a6, s0, 10 - lui s0, 95 - add a7, a2, a6 - sw t6, 380(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -1120 - lui s0, 96 + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 84 + addiw t6, a7, -64 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -160 + lw a6, 0(a7) + lui s2, 87 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 384(a1) - sh2add a6, a0, a7 - lw t6, 0(a6) - addiw a6, s0, -1216 - lui s0, 97 - add a7, a2, a6 - sw t6, 388(a1) - sh2add t6, a0, a7 - lw a6, 0(t6) - addiw t6, s0, -1312 + lw a6, 0(s1) + sh2add s0, a0, a7 + li a7, 1375 + slli t6, a7, 8 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -352 + lw a6, 0(a7) + lui s2, 89 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sw a6, 392(a1) - sh2add a6, a0, a7 - lui a7, 98 - lw t6, 0(a6) - addiw a6, a7, -1408 - add s0, a2, a6 - sw t6, 396(a1) - lui a6, 99 - sh2add t6, a0, s0 - lw a7, 0(t6) - addiw t6, a6, -1504 - sw a7, 400(a1) + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 88 + addiw t6, a7, -448 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -544 + lw a6, 0(a7) + lui s2, 90 + add s1, a1, t6 + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -640 + sh2add a7, a0, s1 + lui s2, 91 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -736 + sh2add a7, a0, s1 + lui s2, 93 + lw a6, 0(a7) + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lw t6, 0(a6) - lui a6, 100 - sw t6, 404(a1) - addiw t6, a6, -1600 + lw a6, 0(s1) + sh2add s0, a0, a7 + lui a7, 92 + addiw t6, a7, -832 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -928 + lw a6, 0(a7) + lui s2, 95 + sh2add a7, a0, s0 + sw a6, 0(a7) + add a7, a1, t6 + sh2add s1, a0, a7 add a7, a2, t6 - sh2add a6, a0, a7 - lui a7, 101 - lw t6, 0(a6) - addiw a6, a7, -1696 - sw t6, 408(a1) - add t6, a2, a6 - sh2add a7, a0, t6 + lw a6, 0(s1) + sh2add s0, a0, a7 + li a7, 375 + slli t6, a7, 10 + sw a6, 0(s0) + add s1, a1, t6 + add s0, a2, t6 + sh2add a7, a0, s1 + addiw t6, s2, -1120 + lw a6, 0(a7) + lui s2, 96 + add s1, a1, t6 + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -1216 + sh2add a7, a0, s1 + lui s2, 99 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + sh2add a7, a0, s1 + lui s1, 97 + lw a6, 0(a7) + addiw t6, s1, -1312 + sh2add a7, a0, s0 + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) + sh2add a7, a0, s0 + lui s0, 98 + lw a6, 0(a7) + addiw t6, s0, -1408 + sh2add a7, a0, s1 + add s0, a2, t6 + add s1, a1, t6 + sw a6, 0(a7) + addiw t6, s2, -1504 + sh2add a7, a0, s1 + lui s2, 100 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + addiw t6, s2, -1600 + sh2add a7, a0, s1 + add s1, a1, t6 + lw a6, 0(a7) + sh2add a7, a0, s0 + add s0, a2, t6 + sw a6, 0(a7) + sh2add a7, a0, s1 + lui s1, 101 + lw a6, 0(a7) + addiw t6, s1, -1696 + sh2add a7, a0, s0 + add s1, a2, t6 + add s0, a1, t6 + sw a6, 0(a7) + sh2add a1, a0, s1 + sh2add a7, a0, s0 addiw a0, a0, 1 lw a6, 0(a7) - sw a6, 412(a1) - ble t5, a0, label11 - add t4, t4, a3 + sw a6, 0(a1) + ble t4, a0, label9 +.p2align 2 +label2: + mv a1, t5 mv t6, zero - mv a1, t4 .p2align 2 -label5: - mul a6, t6, a3 - li s2, 875 +label4: + mul a7, t6, a3 addiw t6, t6, 64 - add a2, a4, a6 - sh2add a7, a0, a2 - add a6, a2, a3 - sh2add s0, a0, a6 - add a6, a2, a5 - lw s1, 0(a7) - sw s1, 0(a1) - lw a7, 0(s0) - sw a7, 4(a1) - sh2add a7, a0, a6 - add a6, a2, t0 - lw s0, 0(a7) - sh2add a7, a0, a6 - sw s0, 8(a1) + add a2, t3, a7 + sh2add a6, a0, a2 + add s1, a2, a3 + sh2add a7, a0, a1 lw s0, 0(a7) - add a7, a2, t1 - sh2add a6, a0, a7 - sw s0, 12(a1) - add a7, a2, t2 - lw s0, 0(a6) - sh2add s1, a0, a7 - sw s0, 16(a1) - add s0, a2, t3 - lw a6, 0(s1) - sh2add a7, a0, s0 - li s1, 875 - sw a6, 20(a1) - lw a6, 0(a7) - slli a7, s1, 5 - li s1, 1125 - sw a6, 24(a1) - add a6, a2, a7 + sh2add a7, a0, s1 + add s1, a2, a4 + sw s0, 0(a6) + add a6, a1, a3 sh2add s0, a0, a6 - li a6, 125 + sh2add a6, a0, s1 + lw s2, 0(s0) + sw s2, 0(a7) + add s2, a1, a4 + sh2add s0, a0, s2 + add s2, a2, a5 lw a7, 0(s0) - sw a7, 28(a1) - slli a7, a6, 8 + add s0, a1, a5 + sh2add s1, a0, s0 + sw a7, 0(a6) + add s0, a2, t0 + lw a6, 0(s1) + sh2add a7, a0, s2 + add s2, a2, t1 + sw a6, 0(a7) + add a7, a1, t0 + sh2add a6, a0, s0 + sh2add s1, a0, a7 + sh2add a7, a0, s2 + lw s0, 0(s1) + li s2, 875 + sw s0, 0(a6) + add s0, a2, t2 + add a6, a1, t1 + sh2add s3, a0, a6 + sh2add a6, a0, s0 + lw s1, 0(s3) + sw s1, 0(a7) + add s1, a1, t2 + sh2add a7, a0, s1 + lw s0, 0(a7) + slli a7, s2, 5 + add s1, a2, a7 + sw s0, 0(a6) + add s0, a1, a7 + sh2add a6, a0, s1 + sh2add s2, a0, s0 + li s1, 125 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s1, 8 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 32(a1) + li s1, 1125 + lw a7, 0(s2) + sw a7, 0(a6) slli a7, s1, 5 - add a6, a2, a7 - sh2add s0, a0, a6 - li a6, 625 - lw a7, 0(s0) - sw a7, 36(a1) - slli a7, a6, 6 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - li a6, 1375 - slli s1, a6, 5 - sw a7, 40(a1) - add a7, a2, s1 - li s1, 375 - sh2add a6, a0, a7 - lw s0, 0(a6) - slli a6, s1, 7 - li s1, 1625 - add a7, a2, a6 - sw s0, 44(a1) - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 48(a1) - slli a6, s1, 5 - add a7, a2, a6 - sh2add s0, a0, a7 - slli a7, s2, 6 - lw a6, 0(s0) - li s2, 125 + lw a7, 0(s2) + li s0, 625 + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1375 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 5 + add s0, a1, a7 add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + li s0, 375 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - sw a6, 52(a1) - li s1, 1875 - lw a6, 0(s0) + sh2add a6, a0, s2 + li s1, 1625 + lw a7, 0(s0) + sw a7, 0(a6) slli a7, s1, 5 - sw a6, 56(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - slli a6, s2, 9 + add s2, a1, a7 + add s0, a2, a7 + sh2add s1, a0, s2 + sh2add a6, a0, s0 + lw a7, 0(s1) + li s0, 875 + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1875 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 5 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - lui s2, 17 - add s1, a2, a6 + li s2, 125 + sw a7, 0(a6) + slli a7, s2, 9 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 17 + sw a7, 0(a6) + addiw a7, s1, -1632 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1125 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lui s0, 19 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s0, -1824 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - sw a7, 60(a1) - addiw s1, s2, -1632 + sh2add a6, a0, s2 + li s1, 625 lw a7, 0(s0) - lui s2, 26 - add a6, a2, s1 - sw a7, 64(a1) - sh2add a7, a0, a6 - lw s0, 0(a7) - li a7, 1125 - slli a6, a7, 6 - sw s0, 68(a1) - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) - lui a7, 19 - sw a6, 72(a1) - addiw a6, a7, -1824 - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) - li a7, 625 - slli s1, a7, 7 - sw a6, 76(a1) - add a6, a2, s1 - sh2add a7, a0, a6 - lw s0, 0(a7) - lui a7, 21 - addiw a6, a7, -2016 - sw s0, 80(a1) - add s0, a2, a6 - li a6, 1375 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 + sh2add a6, a0, s2 + lui s0, 21 lw a7, 0(s1) - li s1, 375 - sw a7, 84(a1) - slli a7, a6, 6 - add s0, a2, a7 - sh2add a6, a0, s0 - lw a7, 0(a6) - lui a6, 22 - sw a7, 88(a1) - addiw a7, a6, 1888 + sw a7, 0(a6) + addiw a7, s0, -2016 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + li s2, 1375 + lw a7, 0(s0) + sw a7, 0(a6) + slli a7, s2, 6 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 22 + sw a7, 0(a6) + addiw a7, s1, 1888 + add s2, a1, a7 add s0, a2, a7 + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - slli a6, s1, 8 - lui s1, 24 - add s0, a2, a6 - sw a7, 92(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 96(a1) - addiw a6, s1, 1696 - add a7, a2, a6 - sh2add s0, a0, a7 - li a7, 1625 - lw a6, 0(s0) - sw a6, 100(a1) - slli a6, a7, 6 - add s0, a2, a6 - addiw a6, s2, 1504 + lw a7, 0(s1) + li s0, 375 + sw a7, 0(a6) + slli a7, s0, 8 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 - li s2, 125 - add s0, a2, a6 + sh2add a6, a0, s2 + lui s0, 24 lw a7, 0(s1) - sw a7, 104(a1) - sh2add a7, a0, s0 - li s0, 875 - lw a6, 0(a7) - slli s1, s0, 7 - add a7, a2, s1 - lui s1, 28 - sw a6, 108(a1) - sh2add a6, a0, a7 - lw s0, 0(a6) - addiw a6, s1, 1312 - li s1, 1875 - add a7, a2, a6 - sw s0, 112(a1) - sh2add s0, a0, a7 - slli a7, s1, 6 - lw a6, 0(s0) + sw a7, 0(a6) + addiw a7, s0, 1696 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + li s0, 1625 + lw a7, 0(s2) + sw a7, 0(a6) + slli a7, s0, 6 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lui s0, 26 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s0, 1504 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + li s1, 875 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lui s0, 28 + lw a7, 0(s1) + sw a7, 0(a6) + addiw a7, s0, 1312 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + li s0, 1875 + lw a7, 0(s1) + sw a7, 0(a6) + slli a7, s0, 6 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 30 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 1120 + add s2, a1, a7 add s0, a2, a7 - sw a6, 116(a1) + sh2add s1, a0, s2 sh2add a6, a0, s0 - addiw s0, s1, 1120 - lw a7, 0(a6) - add a6, a2, s0 - sw a7, 120(a1) - sh2add a7, a0, a6 - lw s1, 0(a7) - slli a7, s2, 10 - li s2, 1125 - add a6, a2, a7 - sw s1, 124(a1) - sh2add s0, a0, a6 + lw a7, 0(s1) + li s0, 125 + sw a7, 0(a6) + slli a7, s0, 10 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 32 lw a7, 0(s0) - addiw a6, s1, 928 + sw a7, 0(a6) + addiw a7, s1, 928 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 33 - sw a7, 128(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 132(a1) - addiw a6, s1, 832 - add a7, a2, a6 - sh2add s0, a0, a7 - lui a7, 34 - lw a6, 0(s0) - sw a6, 136(a1) - addiw a6, a7, 736 - add s1, a2, a6 - slli a6, s2, 7 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 832 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - lui s2, 60 - add s1, a2, a6 + sh2add a6, a0, s2 + lui s1, 34 lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s1, 736 + add s1, a1, a7 + add s2, a2, a7 sh2add s0, a0, s1 - lui s1, 36 - sw a7, 140(a1) - addiw a6, s1, 544 + sh2add a6, a0, s2 + li s1, 1125 lw a7, 0(s0) - lui s1, 37 - sw a7, 144(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 148(a1) - addiw a6, s1, 448 + sw a7, 0(a6) + slli a7, s1, 7 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lui s1, 36 + lw a7, 0(s2) + sw a7, 0(a6) + addiw a7, s1, 544 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + lui s0, 37 + sw a7, 0(a6) + addiw a7, s0, 448 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 38 - add a7, a2, a6 - sh2add s0, a0, a7 + lw a7, 0(s0) + sw a7, 0(a6) addiw a7, s1, 352 - lw a6, 0(s0) - li s1, 625 - sw a6, 152(a1) - add a6, a2, a7 - sh2add s0, a0, a6 + add s2, a1, a7 + add s0, a2, a7 + sh2add s1, a0, s2 + sh2add a6, a0, s0 + lw a7, 0(s1) + li s0, 625 + sw a7, 0(a6) + slli a7, s0, 8 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 lw a7, 0(s0) - sw a7, 156(a1) - slli a7, s1, 8 lui s1, 40 - add a6, a2, a7 - sh2add s0, a0, a6 - lw a7, 0(s0) - sw a7, 160(a1) + sw a7, 0(a6) addiw a7, s1, 160 - lui s1, 41 - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, 64 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + lui s0, 41 + sw a7, 0(a6) + addiw a7, s0, 64 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + lui s2, 42 + sw a7, 0(a6) + addiw a7, s2, -32 + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + li s0, 1375 + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - lui s1, 42 - add s0, a2, a6 - sw a7, 164(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 168(a1) - addiw a6, s1, -32 - li s1, 1375 - add a7, a2, a6 - sh2add s0, a0, a7 - lw a6, 0(s0) - sw a6, 172(a1) - slli a6, s1, 7 - lui s1, 44 - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s1, -224 - lw a6, 0(s0) - lui s1, 46 - sw a6, 176(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - lui a6, 45 + lui s2, 44 + sw a7, 0(a6) + addiw a7, s2, -224 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 lw a7, 0(s0) - sw a7, 180(a1) - addiw a7, a6, -320 + lui s1, 45 + sw a7, 0(a6) + addiw a7, s1, -320 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + lui s2, 46 + sw a7, 0(a6) + addiw a7, s2, -416 + add s2, a1, a7 add s0, a2, a7 + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 184(a1) - addiw a7, s1, -416 - lui s1, 48 - add a6, a2, a7 - sh2add s0, a0, a6 - li a6, 375 + lw a7, 0(s1) + li s0, 375 + sw a7, 0(a6) + slli a7, s0, 9 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lw a7, 0(s0) - sw a7, 188(a1) - slli a7, a6, 9 + lui s2, 48 + sw a7, 0(a6) + addiw a7, s2, -608 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + lui s1, 49 + sw a7, 0(a6) + addiw a7, s1, -704 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - addiw a6, s1, -608 lui s1, 50 - add s0, a2, a6 - sw a7, 192(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - lui a7, 49 - sw a6, 196(a1) - addiw a6, a7, -704 - add s0, a2, a6 - sh2add a7, a0, s0 - lw a6, 0(a7) + lw a7, 0(s2) + sw a7, 0(a6) addiw a7, s1, -800 - li s1, 1625 - sw a6, 200(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - slli a6, s1, 7 - lw a7, 0(s0) + add s1, a1, a7 + add s0, a2, a7 + sh2add s2, a0, s1 + sh2add a6, a0, s0 + lw a7, 0(s2) + li s0, 1625 + sw a7, 0(a6) + slli a7, s0, 7 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 52 - add s0, a2, a6 - sw a7, 204(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - sw a6, 208(a1) - addiw a6, s1, -992 - lui s1, 53 - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s1, -1088 - lw a6, 0(s0) - lui s1, 54 - sw a6, 212(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, -1184 lw a7, 0(s0) - li s1, 875 - sw a7, 216(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - slli a7, s1, 8 - lw a6, 0(s0) - lui s1, 57 + sw a7, 0(a6) + addiw a7, s1, -992 + add s2, a1, a7 add s0, a2, a7 - sw a6, 220(a1) + sh2add s1, a0, s2 sh2add a6, a0, s0 - lw a7, 0(a6) - lui a6, 56 - sw a7, 224(a1) - addiw a7, a6, -1376 + lw a7, 0(s1) + lui s0, 53 + sw a7, 0(a6) + addiw a7, s0, -1088 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lui s2, 54 + lw a7, 0(s0) + sw a7, 0(a6) + addiw a7, s2, -1184 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + li s1, 875 + sw a7, 0(a6) + slli a7, s1, 8 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - sw a7, 228(a1) + lw a7, 0(s2) + lui s0, 56 + sw a7, 0(a6) + addiw a7, s0, -1376 + add s0, a1, a7 + add s1, a2, a7 + sh2add s2, a0, s0 + sh2add a6, a0, s1 + lw a7, 0(s2) + lui s1, 57 + sw a7, 0(a6) addiw a7, s1, -1472 - li s1, 1875 - add a6, a2, a7 - sh2add s0, a0, a6 - lui a6, 58 - lw a7, 0(s0) - sw a7, 232(a1) - addiw a7, a6, -1568 + add s1, a1, a7 add s0, a2, a7 + sh2add s2, a0, s1 sh2add a6, a0, s0 - lw a7, 0(a6) - slli a6, s1, 7 - sw a7, 236(a1) - add a7, a2, a6 - sh2add s0, a0, a7 - addiw a7, s2, -1760 - lw a6, 0(s0) - lui s2, 61 - add s0, a2, a7 - addiw a7, s2, -1856 + lw a7, 0(s2) + lui s0, 58 + sw a7, 0(a6) + addiw a7, s0, -1568 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + li s1, 1875 + sw a7, 0(a6) + slli a7, s1, 7 + add s0, a1, a7 + add s2, a2, a7 sh2add s1, a0, s0 - sw a6, 240(a1) - lw a6, 0(s1) + sh2add a6, a0, s2 + lui s0, 60 + lw a7, 0(s1) + sw a7, 0(a6) + addiw a7, s0, -1760 + add s2, a1, a7 + add s1, a2, a7 + sh2add s0, a0, s2 + sh2add a6, a0, s1 + lw a7, 0(s0) + lui s1, 61 + sw a7, 0(a6) + addiw a7, s1, -1856 + add s1, a1, a7 + add s2, a2, a7 + sh2add s0, a0, s1 + sh2add a6, a0, s2 lui s1, 62 - sw a6, 244(a1) - add a6, a2, a7 - sh2add s0, a0, a6 - addiw a6, s1, -1952 lw a7, 0(s0) - add s0, a2, a6 - sw a7, 248(a1) - sh2add a7, a0, s0 - lw a6, 0(a7) - li a7, 960 - sw a6, 252(a1) - bge t6, a7, label9 - addi a1, a1, 256 - j label5 -label11: - ld s1, 0(sp) - ld s0, 8(sp) + sw a7, 0(a6) + addiw a7, s1, -1952 + add s0, a1, a7 + add s2, a2, a7 + sh2add s1, a0, s0 + sh2add a6, a0, s2 + lw a7, 0(s1) + sw a7, 0(a6) + li a6, 960 + bge t6, a6, label8 + li a6, 125 + slli a2, a6, 11 + add a1, a1, a2 + j label4 +label9: + ld s0, 0(sp) + ld s1, 8(sp) ld s2, 16(sp) - addi sp, sp, 24 + ld s3, 24(sp) + addi sp, sp, 32 ret .p2align 2 cmmc_parallel_body_1: - addi sp, sp, -80 - mv t4, a1 -pcrel1086: + addi sp, sp, -56 + mv t1, a1 +pcrel1303: auipc a4, %pcrel_hi(c) li a5, 125 - mv t6, a0 - addi a3, a4, %pcrel_lo(pcrel1086) - slli a2, a5, 5 + mv t4, a0 + addi a2, a4, %pcrel_lo(pcrel1303) + slli a3, a5, 5 sd s0, 0(sp) - sh2add t1, a2, a2 - sh1add a5, a2, a2 - slli a4, a2, 1 - mul a1, a0, a2 + li a5, 1000 + mul a1, a0, a3 sd s5, 8(sp) - slli t0, a4, 1 -pcrel1087: - auipc a0, %pcrel_hi(a) - add t3, a3, a1 - addi t5, a0, %pcrel_lo(pcrel1087) +pcrel1304: + auipc a0, %pcrel_hi(b) + add a4, a2, a1 sd s1, 16(sp) -pcrel1088: - auipc a1, %pcrel_hi(b) + addi t2, a0, %pcrel_lo(pcrel1304) +pcrel1305: + auipc a1, %pcrel_hi(a) sd s6, 24(sp) - addi a3, a1, %pcrel_lo(pcrel1088) - sd s3, 32(sp) - sd s4, 40(sp) + addi t3, a1, %pcrel_lo(pcrel1305) + sd s4, 32(sp) + li a1, 992 + sd s3, 40(sp) sd s2, 48(sp) - sd s7, 56(sp) - sd s9, 64(sp) - sd s8, 72(sp) - mul a1, t6, a2 - mv a6, t3 - mv a0, zero - add t2, t5, a1 - mv a1, t2 - mv s0, zero - mv s1, zero - j label781 + mul a0, t4, a3 + mv a2, t2 + mv t5, zero + add t0, t3, a0 + mv a0, t0 + mv a6, zero + mv a7, zero + j label1090 .p2align 2 -label786: - li s0, 125 - lui s6, 17 - slli s4, s0, 9 - lw s0, 64(a1) - add s2, a7, s4 - sh2add s3, a0, s2 - lw s4, 0(s3) - mulw s5, s0, s4 - addiw s4, s6, -1632 - addw s2, s1, s5 - li s6, 1125 - add s0, a7, s4 - lw s1, 68(a1) - sh2add s3, a0, s0 - lw s4, 0(s3) - mulw s5, s1, s4 - slli s4, s6, 6 - addw s0, s2, s5 - lui s6, 19 - add s1, a7, s4 - lw s2, 72(a1) - sh2add s3, a0, s1 - lw s4, 0(s3) - mulw s5, s2, s4 - lw s2, 76(a1) - addiw s4, s6, -1824 - addw s1, s0, s5 - li s6, 625 - add s0, a7, s4 - sh2add s3, a0, s0 - lw s5, 0(s3) +label1095: + lw a6, 64(a0) + lw s3, 64(t6) + lw s1, 68(a0) + mulw s2, a6, s3 + lw s3, 68(t6) + addw s0, a7, s2 + lw s2, 72(a0) + mulw a7, s1, s3 + lw s3, 72(t6) + addw a6, s0, a7 + lw s0, 76(a0) + mulw s1, s2, s3 + lw s2, 76(t6) + addw a7, a6, s1 + lw s1, 80(a0) + lw s3, 80(t6) + mulw s4, s0, s2 + lw s0, 84(a0) + addw a6, a7, s4 + mulw s2, s1, s3 + lw s3, 84(t6) + addw a7, a6, s2 + lw s1, 88(a0) + mulw s2, s0, s3 + lw s3, 88(t6) + addw a6, a7, s2 + lw s0, 92(a0) + mulw s2, s1, s3 + lw s1, 92(t6) + addw a7, a6, s2 + sh2add t6, t5, a4 + addiw t5, t5, 1 + mulw a6, s0, s1 + addw a0, a7, a6 + sw a0, 0(t6) + bge t5, a5, label1266 + add a2, a2, a3 + mv a0, t0 + mv a6, zero + mv a7, zero +.p2align 2 +label1090: + sh2add t6, a6, a2 + lw s0, 0(a0) + addiw a6, a6, 16 + lw s1, 0(t6) + lw s4, 4(a0) + lw s5, 4(t6) + mulw s6, s0, s1 + mulw s3, s4, s5 + lw s4, 8(a0) + addw s2, s3, s6 + lw s5, 8(t6) + lw s3, 12(a0) + mulw s1, s4, s5 + lw s5, 12(t6) + addw s0, s2, s1 + lw s2, 16(a0) + mulw s4, s3, s5 + lw s5, 16(t6) + addw s1, s0, s4 + lw s3, 20(a0) mulw s4, s2, s5 - lw s2, 80(a1) - slli s5, s6, 7 + lw s5, 20(t6) addw s0, s1, s4 - lui s6, 21 - add s1, a7, s5 - sh2add s3, a0, s1 - lw s4, 0(s3) + lw s4, 24(a0) + mulw s1, s3, s5 + lw s5, 24(t6) + addw s2, s0, s1 + lw s3, 28(a0) + mulw s0, s4, s5 + lw s4, 28(t6) + addw s1, s2, s0 + lw s2, 32(a0) + mulw s5, s3, s4 + lw s4, 32(t6) + addw s0, s1, s5 + lw s3, 36(a0) + lw s6, 36(t6) mulw s5, s2, s4 - lw s2, 84(a1) - addiw s4, s6, -2016 + lw s2, 40(a0) addw s1, s0, s5 - li s6, 1375 - add s0, a7, s4 - sh2add s3, a0, s0 - lw s4, 0(s3) - mulw s5, s2, s4 - lw s2, 88(a1) - slli s4, s6, 6 - addw s0, s1, s5 - lui s6, 22 - add s1, a7, s4 - sh2add s3, a0, s1 - lw s5, 0(s3) + mulw s4, s3, s6 + lw s5, 40(t6) + addw s0, s1, s4 + lw s3, 44(a0) mulw s4, s2, s5 - addiw s2, s6, 1888 + lw s5, 44(t6) addw s1, s0, s4 - add s3, a7, s2 - lw a7, 92(a1) - sh2add s0, a0, s3 - addiw a0, a0, 1 - lw s4, 0(s0) - mulw s2, a7, s4 - li a7, 1000 - addw a1, s1, s2 - sw a1, 0(a6) - bge a0, a7, label1003 - addi a6, a6, 4 - mv a1, t2 - mv s0, zero - mv s1, zero -.p2align 2 -label781: - mul s3, s0, a2 - lw s2, 0(a1) - addiw s0, s0, 16 - add a7, a3, s3 - sh2add s4, a0, a7 - add s6, a7, a2 - sh2add s7, a0, s6 - lw s3, 0(s4) - lw s5, 4(a1) - lw s9, 0(s7) - mulw s8, s2, s3 - add s7, a7, a4 - lw s2, 8(a1) - mulw s6, s5, s9 - sh2add s5, a0, s7 - addw s4, s6, s8 - add s6, a7, a5 - lw s8, 0(s5) - sh2add s5, a0, s6 - add s6, a7, t0 - mulw s7, s2, s8 - addw s3, s4, s7 - lw s4, 12(a1) - lw s8, 0(s5) - sh2add s5, a0, s6 - add s6, a7, t1 - mulw s7, s4, s8 - lw s4, 16(a1) - addw s2, s3, s7 - lw s7, 0(s5) - sh2add s5, a0, s6 - mulw s8, s4, s7 - lw s4, 20(a1) - addw s3, s2, s8 - lw s7, 0(s5) - li s8, 125 - li s5, 375 - mulw s6, s4, s7 - slli s7, s5, 6 - addw s2, s3, s6 - lw s5, 24(a1) - add s3, a7, s7 - sh2add s4, a0, s3 - lw s6, 0(s4) - li s4, 875 - mulw s7, s5, s6 - slli s6, s4, 5 - addw s3, s2, s7 - lw s4, 28(a1) - add s2, a7, s6 - sh2add s5, a0, s2 - lw s6, 0(s5) - mulw s7, s4, s6 - lw s4, 32(a1) - slli s6, s8, 8 - addw s2, s3, s7 - li s8, 1125 - add s3, a7, s6 - sh2add s5, a0, s3 - lw s6, 0(s5) - mulw s7, s4, s6 - lw s4, 36(a1) - slli s6, s8, 5 - addw s3, s2, s7 - li s8, 1375 - add s2, a7, s6 - sh2add s5, a0, s2 - lw s7, 0(s5) - li s5, 625 - mulw s6, s4, s7 - slli s7, s5, 6 - addw s2, s3, s6 - lw s5, 40(a1) - add s3, a7, s7 - sh2add s4, a0, s3 - lw s6, 0(s4) - mulw s7, s5, s6 - lw s5, 44(a1) - slli s6, s8, 5 - addw s3, s2, s7 - li s8, 875 - add s2, a7, s6 - sh2add s4, a0, s2 - lw s7, 0(s4) - li s4, 375 - mulw s6, s5, s7 - slli s7, s4, 7 - addw s2, s3, s6 - lw s4, 48(a1) - add s3, a7, s7 - sh2add s5, a0, s3 - lw s6, 0(s5) - li s5, 1625 - mulw s7, s4, s6 - slli s6, s5, 5 - addw s3, s2, s7 - lw s5, 52(a1) - add s4, a7, s6 - sh2add s2, a0, s4 - lw s7, 0(s2) - mulw s6, s5, s7 - slli s7, s8, 6 - addw s4, s3, s6 - li s8, 1875 - add s2, a7, s7 - lw s3, 56(a1) - sh2add s5, a0, s2 - lw s6, 0(s5) - mulw s7, s3, s6 - lw s3, 60(a1) - slli s6, s8, 5 - addw s2, s4, s7 - add s5, a7, s6 - sh2add s4, a0, s5 - lw s7, 0(s4) - mulw s6, s3, s7 - addw s5, s2, s6 - li s2, 992 - addw s1, s1, s5 - bge s0, s2, label786 - addi a1, a1, 64 - j label781 + lw s4, 48(a0) + mulw s2, s3, s5 + lw s3, 48(t6) + addw s0, s1, s2 + lw s2, 52(a0) + lw s5, 52(t6) + mulw s6, s4, s3 + lw s3, 56(a0) + addw s1, s0, s6 + mulw s4, s2, s5 + lw s5, 56(t6) + addw s0, s1, s4 + lw s2, 60(a0) + lw s4, 60(t6) + mulw s6, s3, s5 + addw s1, s0, s6 + mulw s3, s2, s4 + addw s0, s1, s3 + addw a7, a7, s0 + bge a6, a1, label1095 + addi a0, a0, 64 + j label1090 .p2align 2 -label1003: - addiw t6, t6, 1 - ble t4, t6, label790 - add t3, t3, a2 - mul a1, t6, a2 - mv a0, zero - mv s0, zero - mv s1, zero - mv a6, t3 - add t2, t5, a1 - mv a1, t2 - j label781 -label790: +label1266: + addiw t4, t4, 1 + ble t1, t4, label1099 + add a4, a4, a3 + mul a0, t4, a3 + mv a2, t2 + mv t5, zero + mv a6, zero + mv a7, zero + add t0, t3, a0 + mv a0, t0 + j label1090 +label1099: ld s0, 0(sp) ld s5, 8(sp) ld s1, 16(sp) ld s6, 24(sp) - ld s3, 32(sp) - ld s4, 40(sp) + ld s4, 32(sp) + ld s3, 40(sp) ld s2, 48(sp) - ld s7, 56(sp) - ld s9, 64(sp) - ld s8, 72(sp) - addi sp, sp, 80 + addi sp, sp, 56 ret .p2align 2 cmmc_parallel_body_2: mv t0, a0 mv a2, a1 addiw a4, a0, 3 -pcrel1192: +pcrel1409: auipc a5, %pcrel_hi(cmmc_parallel_body_payload_2) - ld a3, %pcrel_lo(pcrel1192)(a5) - addi a1, a5, %pcrel_lo(pcrel1192) + ld a3, %pcrel_lo(pcrel1409)(a5) + addi a1, a5, %pcrel_lo(pcrel1409) lw a0, 8(a1) - ble a2, a4, label1090 + ble a2, a4, label1307 addiw t1, t0, 15 addiw a4, a2, -3 addiw a5, a2, -18 - bge t1, a4, label1141 + bge t1, a4, label1358 sh2add a1, t0, a3 - j label1100 + j label1317 .p2align 2 -label1103: +label1320: addi a1, a1, 64 .p2align 2 -label1100: +label1317: sw a0, 0(a1) addiw t0, t0, 16 sw a0, 4(a1) @@ -1547,59 +1664,59 @@ label1100: sw a0, 52(a1) sw a0, 56(a1) sw a0, 60(a1) - bgt a5, t0, label1103 + bgt a5, t0, label1320 mv a1, t0 -label1104: - ble a4, a1, label1090 +label1321: + ble a4, a1, label1307 sh2add a5, a1, a3 -label1108: +label1325: sw a0, 0(a5) addiw a1, a1, 4 sw a0, 4(a5) sw a0, 8(a5) sw a0, 12(a5) - ble a4, a1, label1178 + ble a4, a1, label1395 addi a5, a5, 16 - j label1108 -label1178: + j label1325 +label1395: mv t0, a1 -label1090: - ble a2, t0, label1097 +label1307: + ble a2, t0, label1314 sh2add a1, t0, a3 - j label1093 -label1096: + j label1310 +label1313: addi a1, a1, 4 -label1093: +label1310: addiw t0, t0, 1 sw a0, 0(a1) - bgt a2, t0, label1096 -label1097: + bgt a2, t0, label1313 +label1314: ret -label1141: +label1358: mv a1, t0 mv t0, zero - j label1104 + j label1321 .p2align 2 cmmc_parallel_body_3: mv t0, a0 addiw a5, a0, 3 -pcrel1347: +pcrel1564: auipc a0, %pcrel_hi(cmmc_parallel_body_payload_3) - addi a2, a0, %pcrel_lo(pcrel1347) + addi a2, a0, %pcrel_lo(pcrel1564) ld a3, 8(a2) - ble a1, a5, label1235 + ble a1, a5, label1452 addiw a0, t0, 15 addiw a4, a1, -3 addiw a5, a1, -18 - bge a0, a4, label1242 + bge a0, a4, label1459 sh2add a0, t0, a3 mv t1, zero - j label1210 + j label1427 .p2align 2 -label1214: +label1431: addi a0, a0, 64 .p2align 2 -label1210: +label1427: lw t4, 0(a0) addiw t0, t0, 16 lw t5, 4(a0) @@ -1633,17 +1750,17 @@ label1210: lw t4, 60(a0) addw t2, t3, t6 addw t1, t2, t4 - bgt a5, t0, label1214 + bgt a5, t0, label1431 mv a5, t0 mv t2, t1 -label1195: - ble a4, a5, label1246 +label1412: + ble a4, a5, label1463 sh2add a0, a5, a3 mv t0, t2 - j label1204 -label1208: + j label1421 +label1425: addi a0, a0, 16 -label1204: +label1421: lw t1, 0(a0) addiw a5, a5, 4 lw t4, 4(a0) @@ -1653,39 +1770,39 @@ label1204: lw t3, 12(a0) addw t1, t2, t5 addw t0, t1, t3 - bgt a4, a5, label1208 + bgt a4, a5, label1425 mv a0, t0 mv a4, t0 mv t0, a5 -label1215: - ble a1, t0, label1324 +label1432: + ble a1, t0, label1541 sh2add a0, t0, a3 mv a3, a4 - j label1222 -label1226: + j label1439 +label1443: addi a0, a0, 4 -label1222: +label1439: lw a5, 0(a0) addiw t0, t0, 1 addw a3, a3, a5 - bgt a1, t0, label1226 -label1219: + bgt a1, t0, label1443 +label1436: amoadd.w.aqrl a1, a3, (a2) ret -label1246: +label1463: mv a0, t1 mv a4, t1 - j label1215 -label1242: + j label1432 +label1459: mv a5, t0 mv t2, zero mv t1, zero mv t0, zero - j label1195 -label1324: + j label1412 +label1541: mv a3, a0 - j label1219 -label1235: + j label1436 +label1452: mv a4, zero mv a0, zero - j label1215 + j label1432 diff --git a/tests/SysY2022/performance/matmul3.sy.ir b/tests/SysY2022/performance/matmul3.sy.ir index 4bc2f50e6..f140ebcc6 100644 --- a/tests/SysY2022/performance/matmul3.sy.ir +++ b/tests/SysY2022/performance/matmul3.sy.ir @@ -3,7 +3,7 @@ internal func @putint(i32) -> void { NoMemoryRead NoMemoryWrite }; internal func @starttime(i32) -> void { NoMemoryRead NoMemoryWrite }; internal func @stoptime(i32) -> void { NoMemoryRead NoMemoryWrite }; internal [1000 * [1000 * i32]]* @a, align 8; -internal [1000 * [1000 * i32]]* @b, align 8 { Flexible }; +internal [1000 * [1000 * i32]]* @b, align 8 { Flexible Transposed }; internal [1000 * [1000 * i32]]* @c, align 8 { Flexible }; func @main() -> i32 { NoRecurse Entry } { ^entry: @@ -550,743 +550,823 @@ func @main() -> i32 { NoRecurse Entry } { internal func @cmmcParallelFor(i32, i32, i8*) -> void { NoRecurse }; internal func @cmmc_parallel_body_0(i32 %0, i32 %1) -> void { NoRecurse ParallelBody AlignedParallelBody } { ^b: - [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; ubr ^b1; ^b1: - i32 %4 = phi [^b, i32 %0] [^while.body1, i32 %425]; - [1000 * i32]* %5 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %4]; + i32 %4 = phi [^b, i32 %0] [^while.body1, i32 %528]; ubr ^while.body; ^while.body: - i32 %6 = phi [^b1, i32 0] [^while.body, i32 %263]; - [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %6]; - i32* %8 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %4]; - i32 %9 = load i32* %8; - i32* %10 = getelementptr &([1000 * i32]* %5)[i64 0][i32 %6]; - store i32* %10 with i32 %9; - [1000 * i32]* %11 = getelementptr &([1000 * i32]* %7)[i64 1]; + i32 %5 = phi [^b1, i32 0] [^while.body, i32 %326]; + [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %5]; + i32* %7 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %4]; + i32 %8 = load i32* %7; + [1000 * i32]* %9 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; + i32* %10 = getelementptr &([1000 * i32]* %9)[i64 0][i32 %4]; + store i32* %10 with i32 %8; + [1000 * i32]* %11 = getelementptr &([1000 * i32]* %6)[i64 1]; i32* %12 = getelementptr &([1000 * i32]* %11)[i64 0][i32 %4]; i32 %13 = load i32* %12; - i32* %14 = getelementptr &(i32* %10)[i64 1]; - store i32* %14 with i32 %13; - [1000 * i32]* %15 = getelementptr &([1000 * i32]* %7)[i64 2]; - i32* %16 = getelementptr &([1000 * i32]* %15)[i64 0][i32 %4]; - i32 %17 = load i32* %16; - i32* %18 = getelementptr &(i32* %10)[i64 2]; - store i32* %18 with i32 %17; - [1000 * i32]* %19 = getelementptr &([1000 * i32]* %7)[i64 3]; + [1000 * i32]* %14 = getelementptr &([1000 * i32]* %9)[i64 1]; + i32* %15 = getelementptr &([1000 * i32]* %14)[i64 0][i32 %4]; + store i32* %15 with i32 %13; + [1000 * i32]* %16 = getelementptr &([1000 * i32]* %6)[i64 2]; + i32* %17 = getelementptr &([1000 * i32]* %16)[i64 0][i32 %4]; + i32 %18 = load i32* %17; + [1000 * i32]* %19 = getelementptr &([1000 * i32]* %9)[i64 2]; i32* %20 = getelementptr &([1000 * i32]* %19)[i64 0][i32 %4]; - i32 %21 = load i32* %20; - i32* %22 = getelementptr &(i32* %10)[i64 3]; - store i32* %22 with i32 %21; - [1000 * i32]* %23 = getelementptr &([1000 * i32]* %7)[i64 4]; - i32* %24 = getelementptr &([1000 * i32]* %23)[i64 0][i32 %4]; - i32 %25 = load i32* %24; - i32* %26 = getelementptr &(i32* %10)[i64 4]; - store i32* %26 with i32 %25; - [1000 * i32]* %27 = getelementptr &([1000 * i32]* %7)[i64 5]; - i32* %28 = getelementptr &([1000 * i32]* %27)[i64 0][i32 %4]; - i32 %29 = load i32* %28; - i32* %30 = getelementptr &(i32* %10)[i64 5]; - store i32* %30 with i32 %29; - [1000 * i32]* %31 = getelementptr &([1000 * i32]* %7)[i64 6]; + store i32* %20 with i32 %18; + [1000 * i32]* %21 = getelementptr &([1000 * i32]* %6)[i64 3]; + i32* %22 = getelementptr &([1000 * i32]* %21)[i64 0][i32 %4]; + i32 %23 = load i32* %22; + [1000 * i32]* %24 = getelementptr &([1000 * i32]* %9)[i64 3]; + i32* %25 = getelementptr &([1000 * i32]* %24)[i64 0][i32 %4]; + store i32* %25 with i32 %23; + [1000 * i32]* %26 = getelementptr &([1000 * i32]* %6)[i64 4]; + i32* %27 = getelementptr &([1000 * i32]* %26)[i64 0][i32 %4]; + i32 %28 = load i32* %27; + [1000 * i32]* %29 = getelementptr &([1000 * i32]* %9)[i64 4]; + i32* %30 = getelementptr &([1000 * i32]* %29)[i64 0][i32 %4]; + store i32* %30 with i32 %28; + [1000 * i32]* %31 = getelementptr &([1000 * i32]* %6)[i64 5]; i32* %32 = getelementptr &([1000 * i32]* %31)[i64 0][i32 %4]; i32 %33 = load i32* %32; - i32* %34 = getelementptr &(i32* %10)[i64 6]; - store i32* %34 with i32 %33; - [1000 * i32]* %35 = getelementptr &([1000 * i32]* %7)[i64 7]; - i32* %36 = getelementptr &([1000 * i32]* %35)[i64 0][i32 %4]; - i32 %37 = load i32* %36; - i32* %38 = getelementptr &(i32* %10)[i64 7]; - store i32* %38 with i32 %37; - [1000 * i32]* %39 = getelementptr &([1000 * i32]* %7)[i64 8]; + [1000 * i32]* %34 = getelementptr &([1000 * i32]* %9)[i64 5]; + i32* %35 = getelementptr &([1000 * i32]* %34)[i64 0][i32 %4]; + store i32* %35 with i32 %33; + [1000 * i32]* %36 = getelementptr &([1000 * i32]* %6)[i64 6]; + i32* %37 = getelementptr &([1000 * i32]* %36)[i64 0][i32 %4]; + i32 %38 = load i32* %37; + [1000 * i32]* %39 = getelementptr &([1000 * i32]* %9)[i64 6]; i32* %40 = getelementptr &([1000 * i32]* %39)[i64 0][i32 %4]; - i32 %41 = load i32* %40; - i32* %42 = getelementptr &(i32* %10)[i64 8]; - store i32* %42 with i32 %41; - [1000 * i32]* %43 = getelementptr &([1000 * i32]* %7)[i64 9]; - i32* %44 = getelementptr &([1000 * i32]* %43)[i64 0][i32 %4]; - i32 %45 = load i32* %44; - i32* %46 = getelementptr &(i32* %10)[i64 9]; - store i32* %46 with i32 %45; - [1000 * i32]* %47 = getelementptr &([1000 * i32]* %7)[i64 10]; - i32* %48 = getelementptr &([1000 * i32]* %47)[i64 0][i32 %4]; - i32 %49 = load i32* %48; - i32* %50 = getelementptr &(i32* %10)[i64 10]; - store i32* %50 with i32 %49; - [1000 * i32]* %51 = getelementptr &([1000 * i32]* %7)[i64 11]; + store i32* %40 with i32 %38; + [1000 * i32]* %41 = getelementptr &([1000 * i32]* %6)[i64 7]; + i32* %42 = getelementptr &([1000 * i32]* %41)[i64 0][i32 %4]; + i32 %43 = load i32* %42; + [1000 * i32]* %44 = getelementptr &([1000 * i32]* %9)[i64 7]; + i32* %45 = getelementptr &([1000 * i32]* %44)[i64 0][i32 %4]; + store i32* %45 with i32 %43; + [1000 * i32]* %46 = getelementptr &([1000 * i32]* %6)[i64 8]; + i32* %47 = getelementptr &([1000 * i32]* %46)[i64 0][i32 %4]; + i32 %48 = load i32* %47; + [1000 * i32]* %49 = getelementptr &([1000 * i32]* %9)[i64 8]; + i32* %50 = getelementptr &([1000 * i32]* %49)[i64 0][i32 %4]; + store i32* %50 with i32 %48; + [1000 * i32]* %51 = getelementptr &([1000 * i32]* %6)[i64 9]; i32* %52 = getelementptr &([1000 * i32]* %51)[i64 0][i32 %4]; i32 %53 = load i32* %52; - i32* %54 = getelementptr &(i32* %10)[i64 11]; - store i32* %54 with i32 %53; - [1000 * i32]* %55 = getelementptr &([1000 * i32]* %7)[i64 12]; - i32* %56 = getelementptr &([1000 * i32]* %55)[i64 0][i32 %4]; - i32 %57 = load i32* %56; - i32* %58 = getelementptr &(i32* %10)[i64 12]; - store i32* %58 with i32 %57; - [1000 * i32]* %59 = getelementptr &([1000 * i32]* %7)[i64 13]; + [1000 * i32]* %54 = getelementptr &([1000 * i32]* %9)[i64 9]; + i32* %55 = getelementptr &([1000 * i32]* %54)[i64 0][i32 %4]; + store i32* %55 with i32 %53; + [1000 * i32]* %56 = getelementptr &([1000 * i32]* %6)[i64 10]; + i32* %57 = getelementptr &([1000 * i32]* %56)[i64 0][i32 %4]; + i32 %58 = load i32* %57; + [1000 * i32]* %59 = getelementptr &([1000 * i32]* %9)[i64 10]; i32* %60 = getelementptr &([1000 * i32]* %59)[i64 0][i32 %4]; - i32 %61 = load i32* %60; - i32* %62 = getelementptr &(i32* %10)[i64 13]; - store i32* %62 with i32 %61; - [1000 * i32]* %63 = getelementptr &([1000 * i32]* %7)[i64 14]; - i32* %64 = getelementptr &([1000 * i32]* %63)[i64 0][i32 %4]; - i32 %65 = load i32* %64; - i32* %66 = getelementptr &(i32* %10)[i64 14]; - store i32* %66 with i32 %65; - [1000 * i32]* %67 = getelementptr &([1000 * i32]* %7)[i64 15]; - i32* %68 = getelementptr &([1000 * i32]* %67)[i64 0][i32 %4]; - i32 %69 = load i32* %68; - i32* %70 = getelementptr &(i32* %10)[i64 15]; - store i32* %70 with i32 %69; - [1000 * i32]* %71 = getelementptr &([1000 * i32]* %7)[i64 16]; + store i32* %60 with i32 %58; + [1000 * i32]* %61 = getelementptr &([1000 * i32]* %6)[i64 11]; + i32* %62 = getelementptr &([1000 * i32]* %61)[i64 0][i32 %4]; + i32 %63 = load i32* %62; + [1000 * i32]* %64 = getelementptr &([1000 * i32]* %9)[i64 11]; + i32* %65 = getelementptr &([1000 * i32]* %64)[i64 0][i32 %4]; + store i32* %65 with i32 %63; + [1000 * i32]* %66 = getelementptr &([1000 * i32]* %6)[i64 12]; + i32* %67 = getelementptr &([1000 * i32]* %66)[i64 0][i32 %4]; + i32 %68 = load i32* %67; + [1000 * i32]* %69 = getelementptr &([1000 * i32]* %9)[i64 12]; + i32* %70 = getelementptr &([1000 * i32]* %69)[i64 0][i32 %4]; + store i32* %70 with i32 %68; + [1000 * i32]* %71 = getelementptr &([1000 * i32]* %6)[i64 13]; i32* %72 = getelementptr &([1000 * i32]* %71)[i64 0][i32 %4]; i32 %73 = load i32* %72; - i32* %74 = getelementptr &(i32* %10)[i64 16]; - store i32* %74 with i32 %73; - [1000 * i32]* %75 = getelementptr &([1000 * i32]* %7)[i64 17]; - i32* %76 = getelementptr &([1000 * i32]* %75)[i64 0][i32 %4]; - i32 %77 = load i32* %76; - i32* %78 = getelementptr &(i32* %10)[i64 17]; - store i32* %78 with i32 %77; - [1000 * i32]* %79 = getelementptr &([1000 * i32]* %7)[i64 18]; + [1000 * i32]* %74 = getelementptr &([1000 * i32]* %9)[i64 13]; + i32* %75 = getelementptr &([1000 * i32]* %74)[i64 0][i32 %4]; + store i32* %75 with i32 %73; + [1000 * i32]* %76 = getelementptr &([1000 * i32]* %6)[i64 14]; + i32* %77 = getelementptr &([1000 * i32]* %76)[i64 0][i32 %4]; + i32 %78 = load i32* %77; + [1000 * i32]* %79 = getelementptr &([1000 * i32]* %9)[i64 14]; i32* %80 = getelementptr &([1000 * i32]* %79)[i64 0][i32 %4]; - i32 %81 = load i32* %80; - i32* %82 = getelementptr &(i32* %10)[i64 18]; - store i32* %82 with i32 %81; - [1000 * i32]* %83 = getelementptr &([1000 * i32]* %7)[i64 19]; - i32* %84 = getelementptr &([1000 * i32]* %83)[i64 0][i32 %4]; - i32 %85 = load i32* %84; - i32* %86 = getelementptr &(i32* %10)[i64 19]; - store i32* %86 with i32 %85; - [1000 * i32]* %87 = getelementptr &([1000 * i32]* %7)[i64 20]; - i32* %88 = getelementptr &([1000 * i32]* %87)[i64 0][i32 %4]; - i32 %89 = load i32* %88; - i32* %90 = getelementptr &(i32* %10)[i64 20]; - store i32* %90 with i32 %89; - [1000 * i32]* %91 = getelementptr &([1000 * i32]* %7)[i64 21]; + store i32* %80 with i32 %78; + [1000 * i32]* %81 = getelementptr &([1000 * i32]* %6)[i64 15]; + i32* %82 = getelementptr &([1000 * i32]* %81)[i64 0][i32 %4]; + i32 %83 = load i32* %82; + [1000 * i32]* %84 = getelementptr &([1000 * i32]* %9)[i64 15]; + i32* %85 = getelementptr &([1000 * i32]* %84)[i64 0][i32 %4]; + store i32* %85 with i32 %83; + [1000 * i32]* %86 = getelementptr &([1000 * i32]* %6)[i64 16]; + i32* %87 = getelementptr &([1000 * i32]* %86)[i64 0][i32 %4]; + i32 %88 = load i32* %87; + [1000 * i32]* %89 = getelementptr &([1000 * i32]* %9)[i64 16]; + i32* %90 = getelementptr &([1000 * i32]* %89)[i64 0][i32 %4]; + store i32* %90 with i32 %88; + [1000 * i32]* %91 = getelementptr &([1000 * i32]* %6)[i64 17]; i32* %92 = getelementptr &([1000 * i32]* %91)[i64 0][i32 %4]; i32 %93 = load i32* %92; - i32* %94 = getelementptr &(i32* %10)[i64 21]; - store i32* %94 with i32 %93; - [1000 * i32]* %95 = getelementptr &([1000 * i32]* %7)[i64 22]; - i32* %96 = getelementptr &([1000 * i32]* %95)[i64 0][i32 %4]; - i32 %97 = load i32* %96; - i32* %98 = getelementptr &(i32* %10)[i64 22]; - store i32* %98 with i32 %97; - [1000 * i32]* %99 = getelementptr &([1000 * i32]* %7)[i64 23]; + [1000 * i32]* %94 = getelementptr &([1000 * i32]* %9)[i64 17]; + i32* %95 = getelementptr &([1000 * i32]* %94)[i64 0][i32 %4]; + store i32* %95 with i32 %93; + [1000 * i32]* %96 = getelementptr &([1000 * i32]* %6)[i64 18]; + i32* %97 = getelementptr &([1000 * i32]* %96)[i64 0][i32 %4]; + i32 %98 = load i32* %97; + [1000 * i32]* %99 = getelementptr &([1000 * i32]* %9)[i64 18]; i32* %100 = getelementptr &([1000 * i32]* %99)[i64 0][i32 %4]; - i32 %101 = load i32* %100; - i32* %102 = getelementptr &(i32* %10)[i64 23]; - store i32* %102 with i32 %101; - [1000 * i32]* %103 = getelementptr &([1000 * i32]* %7)[i64 24]; - i32* %104 = getelementptr &([1000 * i32]* %103)[i64 0][i32 %4]; - i32 %105 = load i32* %104; - i32* %106 = getelementptr &(i32* %10)[i64 24]; - store i32* %106 with i32 %105; - [1000 * i32]* %107 = getelementptr &([1000 * i32]* %7)[i64 25]; - i32* %108 = getelementptr &([1000 * i32]* %107)[i64 0][i32 %4]; - i32 %109 = load i32* %108; - i32* %110 = getelementptr &(i32* %10)[i64 25]; - store i32* %110 with i32 %109; - [1000 * i32]* %111 = getelementptr &([1000 * i32]* %7)[i64 26]; + store i32* %100 with i32 %98; + [1000 * i32]* %101 = getelementptr &([1000 * i32]* %6)[i64 19]; + i32* %102 = getelementptr &([1000 * i32]* %101)[i64 0][i32 %4]; + i32 %103 = load i32* %102; + [1000 * i32]* %104 = getelementptr &([1000 * i32]* %9)[i64 19]; + i32* %105 = getelementptr &([1000 * i32]* %104)[i64 0][i32 %4]; + store i32* %105 with i32 %103; + [1000 * i32]* %106 = getelementptr &([1000 * i32]* %6)[i64 20]; + i32* %107 = getelementptr &([1000 * i32]* %106)[i64 0][i32 %4]; + i32 %108 = load i32* %107; + [1000 * i32]* %109 = getelementptr &([1000 * i32]* %9)[i64 20]; + i32* %110 = getelementptr &([1000 * i32]* %109)[i64 0][i32 %4]; + store i32* %110 with i32 %108; + [1000 * i32]* %111 = getelementptr &([1000 * i32]* %6)[i64 21]; i32* %112 = getelementptr &([1000 * i32]* %111)[i64 0][i32 %4]; i32 %113 = load i32* %112; - i32* %114 = getelementptr &(i32* %10)[i64 26]; - store i32* %114 with i32 %113; - [1000 * i32]* %115 = getelementptr &([1000 * i32]* %7)[i64 27]; - i32* %116 = getelementptr &([1000 * i32]* %115)[i64 0][i32 %4]; - i32 %117 = load i32* %116; - i32* %118 = getelementptr &(i32* %10)[i64 27]; - store i32* %118 with i32 %117; - [1000 * i32]* %119 = getelementptr &([1000 * i32]* %7)[i64 28]; + [1000 * i32]* %114 = getelementptr &([1000 * i32]* %9)[i64 21]; + i32* %115 = getelementptr &([1000 * i32]* %114)[i64 0][i32 %4]; + store i32* %115 with i32 %113; + [1000 * i32]* %116 = getelementptr &([1000 * i32]* %6)[i64 22]; + i32* %117 = getelementptr &([1000 * i32]* %116)[i64 0][i32 %4]; + i32 %118 = load i32* %117; + [1000 * i32]* %119 = getelementptr &([1000 * i32]* %9)[i64 22]; i32* %120 = getelementptr &([1000 * i32]* %119)[i64 0][i32 %4]; - i32 %121 = load i32* %120; - i32* %122 = getelementptr &(i32* %10)[i64 28]; - store i32* %122 with i32 %121; - [1000 * i32]* %123 = getelementptr &([1000 * i32]* %7)[i64 29]; - i32* %124 = getelementptr &([1000 * i32]* %123)[i64 0][i32 %4]; - i32 %125 = load i32* %124; - i32* %126 = getelementptr &(i32* %10)[i64 29]; - store i32* %126 with i32 %125; - [1000 * i32]* %127 = getelementptr &([1000 * i32]* %7)[i64 30]; - i32* %128 = getelementptr &([1000 * i32]* %127)[i64 0][i32 %4]; - i32 %129 = load i32* %128; - i32* %130 = getelementptr &(i32* %10)[i64 30]; - store i32* %130 with i32 %129; - [1000 * i32]* %131 = getelementptr &([1000 * i32]* %7)[i64 31]; + store i32* %120 with i32 %118; + [1000 * i32]* %121 = getelementptr &([1000 * i32]* %6)[i64 23]; + i32* %122 = getelementptr &([1000 * i32]* %121)[i64 0][i32 %4]; + i32 %123 = load i32* %122; + [1000 * i32]* %124 = getelementptr &([1000 * i32]* %9)[i64 23]; + i32* %125 = getelementptr &([1000 * i32]* %124)[i64 0][i32 %4]; + store i32* %125 with i32 %123; + [1000 * i32]* %126 = getelementptr &([1000 * i32]* %6)[i64 24]; + i32* %127 = getelementptr &([1000 * i32]* %126)[i64 0][i32 %4]; + i32 %128 = load i32* %127; + [1000 * i32]* %129 = getelementptr &([1000 * i32]* %9)[i64 24]; + i32* %130 = getelementptr &([1000 * i32]* %129)[i64 0][i32 %4]; + store i32* %130 with i32 %128; + [1000 * i32]* %131 = getelementptr &([1000 * i32]* %6)[i64 25]; i32* %132 = getelementptr &([1000 * i32]* %131)[i64 0][i32 %4]; i32 %133 = load i32* %132; - i32* %134 = getelementptr &(i32* %10)[i64 31]; - store i32* %134 with i32 %133; - [1000 * i32]* %135 = getelementptr &([1000 * i32]* %7)[i64 32]; - i32* %136 = getelementptr &([1000 * i32]* %135)[i64 0][i32 %4]; - i32 %137 = load i32* %136; - i32* %138 = getelementptr &(i32* %10)[i64 32]; - store i32* %138 with i32 %137; - [1000 * i32]* %139 = getelementptr &([1000 * i32]* %7)[i64 33]; + [1000 * i32]* %134 = getelementptr &([1000 * i32]* %9)[i64 25]; + i32* %135 = getelementptr &([1000 * i32]* %134)[i64 0][i32 %4]; + store i32* %135 with i32 %133; + [1000 * i32]* %136 = getelementptr &([1000 * i32]* %6)[i64 26]; + i32* %137 = getelementptr &([1000 * i32]* %136)[i64 0][i32 %4]; + i32 %138 = load i32* %137; + [1000 * i32]* %139 = getelementptr &([1000 * i32]* %9)[i64 26]; i32* %140 = getelementptr &([1000 * i32]* %139)[i64 0][i32 %4]; - i32 %141 = load i32* %140; - i32* %142 = getelementptr &(i32* %10)[i64 33]; - store i32* %142 with i32 %141; - [1000 * i32]* %143 = getelementptr &([1000 * i32]* %7)[i64 34]; - i32* %144 = getelementptr &([1000 * i32]* %143)[i64 0][i32 %4]; - i32 %145 = load i32* %144; - i32* %146 = getelementptr &(i32* %10)[i64 34]; - store i32* %146 with i32 %145; - [1000 * i32]* %147 = getelementptr &([1000 * i32]* %7)[i64 35]; - i32* %148 = getelementptr &([1000 * i32]* %147)[i64 0][i32 %4]; - i32 %149 = load i32* %148; - i32* %150 = getelementptr &(i32* %10)[i64 35]; - store i32* %150 with i32 %149; - [1000 * i32]* %151 = getelementptr &([1000 * i32]* %7)[i64 36]; + store i32* %140 with i32 %138; + [1000 * i32]* %141 = getelementptr &([1000 * i32]* %6)[i64 27]; + i32* %142 = getelementptr &([1000 * i32]* %141)[i64 0][i32 %4]; + i32 %143 = load i32* %142; + [1000 * i32]* %144 = getelementptr &([1000 * i32]* %9)[i64 27]; + i32* %145 = getelementptr &([1000 * i32]* %144)[i64 0][i32 %4]; + store i32* %145 with i32 %143; + [1000 * i32]* %146 = getelementptr &([1000 * i32]* %6)[i64 28]; + i32* %147 = getelementptr &([1000 * i32]* %146)[i64 0][i32 %4]; + i32 %148 = load i32* %147; + [1000 * i32]* %149 = getelementptr &([1000 * i32]* %9)[i64 28]; + i32* %150 = getelementptr &([1000 * i32]* %149)[i64 0][i32 %4]; + store i32* %150 with i32 %148; + [1000 * i32]* %151 = getelementptr &([1000 * i32]* %6)[i64 29]; i32* %152 = getelementptr &([1000 * i32]* %151)[i64 0][i32 %4]; i32 %153 = load i32* %152; - i32* %154 = getelementptr &(i32* %10)[i64 36]; - store i32* %154 with i32 %153; - [1000 * i32]* %155 = getelementptr &([1000 * i32]* %7)[i64 37]; - i32* %156 = getelementptr &([1000 * i32]* %155)[i64 0][i32 %4]; - i32 %157 = load i32* %156; - i32* %158 = getelementptr &(i32* %10)[i64 37]; - store i32* %158 with i32 %157; - [1000 * i32]* %159 = getelementptr &([1000 * i32]* %7)[i64 38]; + [1000 * i32]* %154 = getelementptr &([1000 * i32]* %9)[i64 29]; + i32* %155 = getelementptr &([1000 * i32]* %154)[i64 0][i32 %4]; + store i32* %155 with i32 %153; + [1000 * i32]* %156 = getelementptr &([1000 * i32]* %6)[i64 30]; + i32* %157 = getelementptr &([1000 * i32]* %156)[i64 0][i32 %4]; + i32 %158 = load i32* %157; + [1000 * i32]* %159 = getelementptr &([1000 * i32]* %9)[i64 30]; i32* %160 = getelementptr &([1000 * i32]* %159)[i64 0][i32 %4]; - i32 %161 = load i32* %160; - i32* %162 = getelementptr &(i32* %10)[i64 38]; - store i32* %162 with i32 %161; - [1000 * i32]* %163 = getelementptr &([1000 * i32]* %7)[i64 39]; - i32* %164 = getelementptr &([1000 * i32]* %163)[i64 0][i32 %4]; - i32 %165 = load i32* %164; - i32* %166 = getelementptr &(i32* %10)[i64 39]; - store i32* %166 with i32 %165; - [1000 * i32]* %167 = getelementptr &([1000 * i32]* %7)[i64 40]; - i32* %168 = getelementptr &([1000 * i32]* %167)[i64 0][i32 %4]; - i32 %169 = load i32* %168; - i32* %170 = getelementptr &(i32* %10)[i64 40]; - store i32* %170 with i32 %169; - [1000 * i32]* %171 = getelementptr &([1000 * i32]* %7)[i64 41]; + store i32* %160 with i32 %158; + [1000 * i32]* %161 = getelementptr &([1000 * i32]* %6)[i64 31]; + i32* %162 = getelementptr &([1000 * i32]* %161)[i64 0][i32 %4]; + i32 %163 = load i32* %162; + [1000 * i32]* %164 = getelementptr &([1000 * i32]* %9)[i64 31]; + i32* %165 = getelementptr &([1000 * i32]* %164)[i64 0][i32 %4]; + store i32* %165 with i32 %163; + [1000 * i32]* %166 = getelementptr &([1000 * i32]* %6)[i64 32]; + i32* %167 = getelementptr &([1000 * i32]* %166)[i64 0][i32 %4]; + i32 %168 = load i32* %167; + [1000 * i32]* %169 = getelementptr &([1000 * i32]* %9)[i64 32]; + i32* %170 = getelementptr &([1000 * i32]* %169)[i64 0][i32 %4]; + store i32* %170 with i32 %168; + [1000 * i32]* %171 = getelementptr &([1000 * i32]* %6)[i64 33]; i32* %172 = getelementptr &([1000 * i32]* %171)[i64 0][i32 %4]; i32 %173 = load i32* %172; - i32* %174 = getelementptr &(i32* %10)[i64 41]; - store i32* %174 with i32 %173; - [1000 * i32]* %175 = getelementptr &([1000 * i32]* %7)[i64 42]; - i32* %176 = getelementptr &([1000 * i32]* %175)[i64 0][i32 %4]; - i32 %177 = load i32* %176; - i32* %178 = getelementptr &(i32* %10)[i64 42]; - store i32* %178 with i32 %177; - [1000 * i32]* %179 = getelementptr &([1000 * i32]* %7)[i64 43]; + [1000 * i32]* %174 = getelementptr &([1000 * i32]* %9)[i64 33]; + i32* %175 = getelementptr &([1000 * i32]* %174)[i64 0][i32 %4]; + store i32* %175 with i32 %173; + [1000 * i32]* %176 = getelementptr &([1000 * i32]* %6)[i64 34]; + i32* %177 = getelementptr &([1000 * i32]* %176)[i64 0][i32 %4]; + i32 %178 = load i32* %177; + [1000 * i32]* %179 = getelementptr &([1000 * i32]* %9)[i64 34]; i32* %180 = getelementptr &([1000 * i32]* %179)[i64 0][i32 %4]; - i32 %181 = load i32* %180; - i32* %182 = getelementptr &(i32* %10)[i64 43]; - store i32* %182 with i32 %181; - [1000 * i32]* %183 = getelementptr &([1000 * i32]* %7)[i64 44]; - i32* %184 = getelementptr &([1000 * i32]* %183)[i64 0][i32 %4]; - i32 %185 = load i32* %184; - i32* %186 = getelementptr &(i32* %10)[i64 44]; - store i32* %186 with i32 %185; - [1000 * i32]* %187 = getelementptr &([1000 * i32]* %7)[i64 45]; - i32* %188 = getelementptr &([1000 * i32]* %187)[i64 0][i32 %4]; - i32 %189 = load i32* %188; - i32* %190 = getelementptr &(i32* %10)[i64 45]; - store i32* %190 with i32 %189; - [1000 * i32]* %191 = getelementptr &([1000 * i32]* %7)[i64 46]; + store i32* %180 with i32 %178; + [1000 * i32]* %181 = getelementptr &([1000 * i32]* %6)[i64 35]; + i32* %182 = getelementptr &([1000 * i32]* %181)[i64 0][i32 %4]; + i32 %183 = load i32* %182; + [1000 * i32]* %184 = getelementptr &([1000 * i32]* %9)[i64 35]; + i32* %185 = getelementptr &([1000 * i32]* %184)[i64 0][i32 %4]; + store i32* %185 with i32 %183; + [1000 * i32]* %186 = getelementptr &([1000 * i32]* %6)[i64 36]; + i32* %187 = getelementptr &([1000 * i32]* %186)[i64 0][i32 %4]; + i32 %188 = load i32* %187; + [1000 * i32]* %189 = getelementptr &([1000 * i32]* %9)[i64 36]; + i32* %190 = getelementptr &([1000 * i32]* %189)[i64 0][i32 %4]; + store i32* %190 with i32 %188; + [1000 * i32]* %191 = getelementptr &([1000 * i32]* %6)[i64 37]; i32* %192 = getelementptr &([1000 * i32]* %191)[i64 0][i32 %4]; i32 %193 = load i32* %192; - i32* %194 = getelementptr &(i32* %10)[i64 46]; - store i32* %194 with i32 %193; - [1000 * i32]* %195 = getelementptr &([1000 * i32]* %7)[i64 47]; - i32* %196 = getelementptr &([1000 * i32]* %195)[i64 0][i32 %4]; - i32 %197 = load i32* %196; - i32* %198 = getelementptr &(i32* %10)[i64 47]; - store i32* %198 with i32 %197; - [1000 * i32]* %199 = getelementptr &([1000 * i32]* %7)[i64 48]; + [1000 * i32]* %194 = getelementptr &([1000 * i32]* %9)[i64 37]; + i32* %195 = getelementptr &([1000 * i32]* %194)[i64 0][i32 %4]; + store i32* %195 with i32 %193; + [1000 * i32]* %196 = getelementptr &([1000 * i32]* %6)[i64 38]; + i32* %197 = getelementptr &([1000 * i32]* %196)[i64 0][i32 %4]; + i32 %198 = load i32* %197; + [1000 * i32]* %199 = getelementptr &([1000 * i32]* %9)[i64 38]; i32* %200 = getelementptr &([1000 * i32]* %199)[i64 0][i32 %4]; - i32 %201 = load i32* %200; - i32* %202 = getelementptr &(i32* %10)[i64 48]; - store i32* %202 with i32 %201; - [1000 * i32]* %203 = getelementptr &([1000 * i32]* %7)[i64 49]; - i32* %204 = getelementptr &([1000 * i32]* %203)[i64 0][i32 %4]; - i32 %205 = load i32* %204; - i32* %206 = getelementptr &(i32* %10)[i64 49]; - store i32* %206 with i32 %205; - [1000 * i32]* %207 = getelementptr &([1000 * i32]* %7)[i64 50]; - i32* %208 = getelementptr &([1000 * i32]* %207)[i64 0][i32 %4]; - i32 %209 = load i32* %208; - i32* %210 = getelementptr &(i32* %10)[i64 50]; - store i32* %210 with i32 %209; - [1000 * i32]* %211 = getelementptr &([1000 * i32]* %7)[i64 51]; + store i32* %200 with i32 %198; + [1000 * i32]* %201 = getelementptr &([1000 * i32]* %6)[i64 39]; + i32* %202 = getelementptr &([1000 * i32]* %201)[i64 0][i32 %4]; + i32 %203 = load i32* %202; + [1000 * i32]* %204 = getelementptr &([1000 * i32]* %9)[i64 39]; + i32* %205 = getelementptr &([1000 * i32]* %204)[i64 0][i32 %4]; + store i32* %205 with i32 %203; + [1000 * i32]* %206 = getelementptr &([1000 * i32]* %6)[i64 40]; + i32* %207 = getelementptr &([1000 * i32]* %206)[i64 0][i32 %4]; + i32 %208 = load i32* %207; + [1000 * i32]* %209 = getelementptr &([1000 * i32]* %9)[i64 40]; + i32* %210 = getelementptr &([1000 * i32]* %209)[i64 0][i32 %4]; + store i32* %210 with i32 %208; + [1000 * i32]* %211 = getelementptr &([1000 * i32]* %6)[i64 41]; i32* %212 = getelementptr &([1000 * i32]* %211)[i64 0][i32 %4]; i32 %213 = load i32* %212; - i32* %214 = getelementptr &(i32* %10)[i64 51]; - store i32* %214 with i32 %213; - [1000 * i32]* %215 = getelementptr &([1000 * i32]* %7)[i64 52]; - i32* %216 = getelementptr &([1000 * i32]* %215)[i64 0][i32 %4]; - i32 %217 = load i32* %216; - i32* %218 = getelementptr &(i32* %10)[i64 52]; - store i32* %218 with i32 %217; - [1000 * i32]* %219 = getelementptr &([1000 * i32]* %7)[i64 53]; + [1000 * i32]* %214 = getelementptr &([1000 * i32]* %9)[i64 41]; + i32* %215 = getelementptr &([1000 * i32]* %214)[i64 0][i32 %4]; + store i32* %215 with i32 %213; + [1000 * i32]* %216 = getelementptr &([1000 * i32]* %6)[i64 42]; + i32* %217 = getelementptr &([1000 * i32]* %216)[i64 0][i32 %4]; + i32 %218 = load i32* %217; + [1000 * i32]* %219 = getelementptr &([1000 * i32]* %9)[i64 42]; i32* %220 = getelementptr &([1000 * i32]* %219)[i64 0][i32 %4]; - i32 %221 = load i32* %220; - i32* %222 = getelementptr &(i32* %10)[i64 53]; - store i32* %222 with i32 %221; - [1000 * i32]* %223 = getelementptr &([1000 * i32]* %7)[i64 54]; - i32* %224 = getelementptr &([1000 * i32]* %223)[i64 0][i32 %4]; - i32 %225 = load i32* %224; - i32* %226 = getelementptr &(i32* %10)[i64 54]; - store i32* %226 with i32 %225; - [1000 * i32]* %227 = getelementptr &([1000 * i32]* %7)[i64 55]; - i32* %228 = getelementptr &([1000 * i32]* %227)[i64 0][i32 %4]; - i32 %229 = load i32* %228; - i32* %230 = getelementptr &(i32* %10)[i64 55]; - store i32* %230 with i32 %229; - [1000 * i32]* %231 = getelementptr &([1000 * i32]* %7)[i64 56]; + store i32* %220 with i32 %218; + [1000 * i32]* %221 = getelementptr &([1000 * i32]* %6)[i64 43]; + i32* %222 = getelementptr &([1000 * i32]* %221)[i64 0][i32 %4]; + i32 %223 = load i32* %222; + [1000 * i32]* %224 = getelementptr &([1000 * i32]* %9)[i64 43]; + i32* %225 = getelementptr &([1000 * i32]* %224)[i64 0][i32 %4]; + store i32* %225 with i32 %223; + [1000 * i32]* %226 = getelementptr &([1000 * i32]* %6)[i64 44]; + i32* %227 = getelementptr &([1000 * i32]* %226)[i64 0][i32 %4]; + i32 %228 = load i32* %227; + [1000 * i32]* %229 = getelementptr &([1000 * i32]* %9)[i64 44]; + i32* %230 = getelementptr &([1000 * i32]* %229)[i64 0][i32 %4]; + store i32* %230 with i32 %228; + [1000 * i32]* %231 = getelementptr &([1000 * i32]* %6)[i64 45]; i32* %232 = getelementptr &([1000 * i32]* %231)[i64 0][i32 %4]; i32 %233 = load i32* %232; - i32* %234 = getelementptr &(i32* %10)[i64 56]; - store i32* %234 with i32 %233; - [1000 * i32]* %235 = getelementptr &([1000 * i32]* %7)[i64 57]; - i32* %236 = getelementptr &([1000 * i32]* %235)[i64 0][i32 %4]; - i32 %237 = load i32* %236; - i32* %238 = getelementptr &(i32* %10)[i64 57]; - store i32* %238 with i32 %237; - [1000 * i32]* %239 = getelementptr &([1000 * i32]* %7)[i64 58]; + [1000 * i32]* %234 = getelementptr &([1000 * i32]* %9)[i64 45]; + i32* %235 = getelementptr &([1000 * i32]* %234)[i64 0][i32 %4]; + store i32* %235 with i32 %233; + [1000 * i32]* %236 = getelementptr &([1000 * i32]* %6)[i64 46]; + i32* %237 = getelementptr &([1000 * i32]* %236)[i64 0][i32 %4]; + i32 %238 = load i32* %237; + [1000 * i32]* %239 = getelementptr &([1000 * i32]* %9)[i64 46]; i32* %240 = getelementptr &([1000 * i32]* %239)[i64 0][i32 %4]; - i32 %241 = load i32* %240; - i32* %242 = getelementptr &(i32* %10)[i64 58]; - store i32* %242 with i32 %241; - [1000 * i32]* %243 = getelementptr &([1000 * i32]* %7)[i64 59]; - i32* %244 = getelementptr &([1000 * i32]* %243)[i64 0][i32 %4]; - i32 %245 = load i32* %244; - i32* %246 = getelementptr &(i32* %10)[i64 59]; - store i32* %246 with i32 %245; - [1000 * i32]* %247 = getelementptr &([1000 * i32]* %7)[i64 60]; - i32* %248 = getelementptr &([1000 * i32]* %247)[i64 0][i32 %4]; - i32 %249 = load i32* %248; - i32* %250 = getelementptr &(i32* %10)[i64 60]; - store i32* %250 with i32 %249; - [1000 * i32]* %251 = getelementptr &([1000 * i32]* %7)[i64 61]; + store i32* %240 with i32 %238; + [1000 * i32]* %241 = getelementptr &([1000 * i32]* %6)[i64 47]; + i32* %242 = getelementptr &([1000 * i32]* %241)[i64 0][i32 %4]; + i32 %243 = load i32* %242; + [1000 * i32]* %244 = getelementptr &([1000 * i32]* %9)[i64 47]; + i32* %245 = getelementptr &([1000 * i32]* %244)[i64 0][i32 %4]; + store i32* %245 with i32 %243; + [1000 * i32]* %246 = getelementptr &([1000 * i32]* %6)[i64 48]; + i32* %247 = getelementptr &([1000 * i32]* %246)[i64 0][i32 %4]; + i32 %248 = load i32* %247; + [1000 * i32]* %249 = getelementptr &([1000 * i32]* %9)[i64 48]; + i32* %250 = getelementptr &([1000 * i32]* %249)[i64 0][i32 %4]; + store i32* %250 with i32 %248; + [1000 * i32]* %251 = getelementptr &([1000 * i32]* %6)[i64 49]; i32* %252 = getelementptr &([1000 * i32]* %251)[i64 0][i32 %4]; i32 %253 = load i32* %252; - i32* %254 = getelementptr &(i32* %10)[i64 61]; - store i32* %254 with i32 %253; - [1000 * i32]* %255 = getelementptr &([1000 * i32]* %7)[i64 62]; - i32* %256 = getelementptr &([1000 * i32]* %255)[i64 0][i32 %4]; - i32 %257 = load i32* %256; - i32* %258 = getelementptr &(i32* %10)[i64 62]; - store i32* %258 with i32 %257; - [1000 * i32]* %259 = getelementptr &([1000 * i32]* %7)[i64 63]; + [1000 * i32]* %254 = getelementptr &([1000 * i32]* %9)[i64 49]; + i32* %255 = getelementptr &([1000 * i32]* %254)[i64 0][i32 %4]; + store i32* %255 with i32 %253; + [1000 * i32]* %256 = getelementptr &([1000 * i32]* %6)[i64 50]; + i32* %257 = getelementptr &([1000 * i32]* %256)[i64 0][i32 %4]; + i32 %258 = load i32* %257; + [1000 * i32]* %259 = getelementptr &([1000 * i32]* %9)[i64 50]; i32* %260 = getelementptr &([1000 * i32]* %259)[i64 0][i32 %4]; - i32 %261 = load i32* %260; - i32* %262 = getelementptr &(i32* %10)[i64 63]; - store i32* %262 with i32 %261; - i32 %263 = add i32 %6, i32 64; - i1 %264 = icmp slt i32 %263, i32 960; - cbr i1 %264(prob = 0.933333), ^while.body, ^while.body1; - ^while.body1: - [1000 * i32]* %265 = getelementptr &([1000 * i32]* %7)[i64 64]; - i32* %266 = getelementptr &([1000 * i32]* %265)[i64 0][i32 %4]; - i32 %267 = load i32* %266; - i32* %268 = getelementptr &(i32* %10)[i64 64]; - store i32* %268 with i32 %267; - [1000 * i32]* %269 = getelementptr &([1000 * i32]* %7)[i64 65]; + store i32* %260 with i32 %258; + [1000 * i32]* %261 = getelementptr &([1000 * i32]* %6)[i64 51]; + i32* %262 = getelementptr &([1000 * i32]* %261)[i64 0][i32 %4]; + i32 %263 = load i32* %262; + [1000 * i32]* %264 = getelementptr &([1000 * i32]* %9)[i64 51]; + i32* %265 = getelementptr &([1000 * i32]* %264)[i64 0][i32 %4]; + store i32* %265 with i32 %263; + [1000 * i32]* %266 = getelementptr &([1000 * i32]* %6)[i64 52]; + i32* %267 = getelementptr &([1000 * i32]* %266)[i64 0][i32 %4]; + i32 %268 = load i32* %267; + [1000 * i32]* %269 = getelementptr &([1000 * i32]* %9)[i64 52]; i32* %270 = getelementptr &([1000 * i32]* %269)[i64 0][i32 %4]; - i32 %271 = load i32* %270; - i32* %272 = getelementptr &(i32* %10)[i64 65]; - store i32* %272 with i32 %271; - [1000 * i32]* %273 = getelementptr &([1000 * i32]* %7)[i64 66]; - i32* %274 = getelementptr &([1000 * i32]* %273)[i64 0][i32 %4]; - i32 %275 = load i32* %274; - i32* %276 = getelementptr &(i32* %10)[i64 66]; - store i32* %276 with i32 %275; - [1000 * i32]* %277 = getelementptr &([1000 * i32]* %7)[i64 67]; - i32* %278 = getelementptr &([1000 * i32]* %277)[i64 0][i32 %4]; - i32 %279 = load i32* %278; - i32* %280 = getelementptr &(i32* %10)[i64 67]; - store i32* %280 with i32 %279; - [1000 * i32]* %281 = getelementptr &([1000 * i32]* %7)[i64 68]; + store i32* %270 with i32 %268; + [1000 * i32]* %271 = getelementptr &([1000 * i32]* %6)[i64 53]; + i32* %272 = getelementptr &([1000 * i32]* %271)[i64 0][i32 %4]; + i32 %273 = load i32* %272; + [1000 * i32]* %274 = getelementptr &([1000 * i32]* %9)[i64 53]; + i32* %275 = getelementptr &([1000 * i32]* %274)[i64 0][i32 %4]; + store i32* %275 with i32 %273; + [1000 * i32]* %276 = getelementptr &([1000 * i32]* %6)[i64 54]; + i32* %277 = getelementptr &([1000 * i32]* %276)[i64 0][i32 %4]; + i32 %278 = load i32* %277; + [1000 * i32]* %279 = getelementptr &([1000 * i32]* %9)[i64 54]; + i32* %280 = getelementptr &([1000 * i32]* %279)[i64 0][i32 %4]; + store i32* %280 with i32 %278; + [1000 * i32]* %281 = getelementptr &([1000 * i32]* %6)[i64 55]; i32* %282 = getelementptr &([1000 * i32]* %281)[i64 0][i32 %4]; i32 %283 = load i32* %282; - i32* %284 = getelementptr &(i32* %10)[i64 68]; - store i32* %284 with i32 %283; - [1000 * i32]* %285 = getelementptr &([1000 * i32]* %7)[i64 69]; - i32* %286 = getelementptr &([1000 * i32]* %285)[i64 0][i32 %4]; - i32 %287 = load i32* %286; - i32* %288 = getelementptr &(i32* %10)[i64 69]; - store i32* %288 with i32 %287; - [1000 * i32]* %289 = getelementptr &([1000 * i32]* %7)[i64 70]; + [1000 * i32]* %284 = getelementptr &([1000 * i32]* %9)[i64 55]; + i32* %285 = getelementptr &([1000 * i32]* %284)[i64 0][i32 %4]; + store i32* %285 with i32 %283; + [1000 * i32]* %286 = getelementptr &([1000 * i32]* %6)[i64 56]; + i32* %287 = getelementptr &([1000 * i32]* %286)[i64 0][i32 %4]; + i32 %288 = load i32* %287; + [1000 * i32]* %289 = getelementptr &([1000 * i32]* %9)[i64 56]; i32* %290 = getelementptr &([1000 * i32]* %289)[i64 0][i32 %4]; - i32 %291 = load i32* %290; - i32* %292 = getelementptr &(i32* %10)[i64 70]; - store i32* %292 with i32 %291; - [1000 * i32]* %293 = getelementptr &([1000 * i32]* %7)[i64 71]; - i32* %294 = getelementptr &([1000 * i32]* %293)[i64 0][i32 %4]; - i32 %295 = load i32* %294; - i32* %296 = getelementptr &(i32* %10)[i64 71]; - store i32* %296 with i32 %295; - [1000 * i32]* %297 = getelementptr &([1000 * i32]* %7)[i64 72]; - i32* %298 = getelementptr &([1000 * i32]* %297)[i64 0][i32 %4]; - i32 %299 = load i32* %298; - i32* %300 = getelementptr &(i32* %10)[i64 72]; - store i32* %300 with i32 %299; - [1000 * i32]* %301 = getelementptr &([1000 * i32]* %7)[i64 73]; + store i32* %290 with i32 %288; + [1000 * i32]* %291 = getelementptr &([1000 * i32]* %6)[i64 57]; + i32* %292 = getelementptr &([1000 * i32]* %291)[i64 0][i32 %4]; + i32 %293 = load i32* %292; + [1000 * i32]* %294 = getelementptr &([1000 * i32]* %9)[i64 57]; + i32* %295 = getelementptr &([1000 * i32]* %294)[i64 0][i32 %4]; + store i32* %295 with i32 %293; + [1000 * i32]* %296 = getelementptr &([1000 * i32]* %6)[i64 58]; + i32* %297 = getelementptr &([1000 * i32]* %296)[i64 0][i32 %4]; + i32 %298 = load i32* %297; + [1000 * i32]* %299 = getelementptr &([1000 * i32]* %9)[i64 58]; + i32* %300 = getelementptr &([1000 * i32]* %299)[i64 0][i32 %4]; + store i32* %300 with i32 %298; + [1000 * i32]* %301 = getelementptr &([1000 * i32]* %6)[i64 59]; i32* %302 = getelementptr &([1000 * i32]* %301)[i64 0][i32 %4]; i32 %303 = load i32* %302; - i32* %304 = getelementptr &(i32* %10)[i64 73]; - store i32* %304 with i32 %303; - [1000 * i32]* %305 = getelementptr &([1000 * i32]* %7)[i64 74]; - i32* %306 = getelementptr &([1000 * i32]* %305)[i64 0][i32 %4]; - i32 %307 = load i32* %306; - i32* %308 = getelementptr &(i32* %10)[i64 74]; - store i32* %308 with i32 %307; - [1000 * i32]* %309 = getelementptr &([1000 * i32]* %7)[i64 75]; + [1000 * i32]* %304 = getelementptr &([1000 * i32]* %9)[i64 59]; + i32* %305 = getelementptr &([1000 * i32]* %304)[i64 0][i32 %4]; + store i32* %305 with i32 %303; + [1000 * i32]* %306 = getelementptr &([1000 * i32]* %6)[i64 60]; + i32* %307 = getelementptr &([1000 * i32]* %306)[i64 0][i32 %4]; + i32 %308 = load i32* %307; + [1000 * i32]* %309 = getelementptr &([1000 * i32]* %9)[i64 60]; i32* %310 = getelementptr &([1000 * i32]* %309)[i64 0][i32 %4]; - i32 %311 = load i32* %310; - i32* %312 = getelementptr &(i32* %10)[i64 75]; - store i32* %312 with i32 %311; - [1000 * i32]* %313 = getelementptr &([1000 * i32]* %7)[i64 76]; - i32* %314 = getelementptr &([1000 * i32]* %313)[i64 0][i32 %4]; - i32 %315 = load i32* %314; - i32* %316 = getelementptr &(i32* %10)[i64 76]; - store i32* %316 with i32 %315; - [1000 * i32]* %317 = getelementptr &([1000 * i32]* %7)[i64 77]; - i32* %318 = getelementptr &([1000 * i32]* %317)[i64 0][i32 %4]; - i32 %319 = load i32* %318; - i32* %320 = getelementptr &(i32* %10)[i64 77]; - store i32* %320 with i32 %319; - [1000 * i32]* %321 = getelementptr &([1000 * i32]* %7)[i64 78]; + store i32* %310 with i32 %308; + [1000 * i32]* %311 = getelementptr &([1000 * i32]* %6)[i64 61]; + i32* %312 = getelementptr &([1000 * i32]* %311)[i64 0][i32 %4]; + i32 %313 = load i32* %312; + [1000 * i32]* %314 = getelementptr &([1000 * i32]* %9)[i64 61]; + i32* %315 = getelementptr &([1000 * i32]* %314)[i64 0][i32 %4]; + store i32* %315 with i32 %313; + [1000 * i32]* %316 = getelementptr &([1000 * i32]* %6)[i64 62]; + i32* %317 = getelementptr &([1000 * i32]* %316)[i64 0][i32 %4]; + i32 %318 = load i32* %317; + [1000 * i32]* %319 = getelementptr &([1000 * i32]* %9)[i64 62]; + i32* %320 = getelementptr &([1000 * i32]* %319)[i64 0][i32 %4]; + store i32* %320 with i32 %318; + [1000 * i32]* %321 = getelementptr &([1000 * i32]* %6)[i64 63]; i32* %322 = getelementptr &([1000 * i32]* %321)[i64 0][i32 %4]; i32 %323 = load i32* %322; - i32* %324 = getelementptr &(i32* %10)[i64 78]; - store i32* %324 with i32 %323; - [1000 * i32]* %325 = getelementptr &([1000 * i32]* %7)[i64 79]; - i32* %326 = getelementptr &([1000 * i32]* %325)[i64 0][i32 %4]; - i32 %327 = load i32* %326; - i32* %328 = getelementptr &(i32* %10)[i64 79]; - store i32* %328 with i32 %327; - [1000 * i32]* %329 = getelementptr &([1000 * i32]* %7)[i64 80]; - i32* %330 = getelementptr &([1000 * i32]* %329)[i64 0][i32 %4]; - i32 %331 = load i32* %330; - i32* %332 = getelementptr &(i32* %10)[i64 80]; - store i32* %332 with i32 %331; - [1000 * i32]* %333 = getelementptr &([1000 * i32]* %7)[i64 81]; + [1000 * i32]* %324 = getelementptr &([1000 * i32]* %9)[i64 63]; + i32* %325 = getelementptr &([1000 * i32]* %324)[i64 0][i32 %4]; + store i32* %325 with i32 %323; + i32 %326 = add i32 %5, i32 64; + i1 %327 = icmp slt i32 %326, i32 960; + cbr i1 %327(prob = 0.933333), ^while.body, ^while.body1; + ^while.body1: + [1000 * i32]* %328 = getelementptr &([1000 * i32]* %6)[i64 64]; + i32* %329 = getelementptr &([1000 * i32]* %328)[i64 0][i32 %4]; + i32 %330 = load i32* %329; + [1000 * i32]* %331 = getelementptr &([1000 * i32]* %9)[i64 64]; + i32* %332 = getelementptr &([1000 * i32]* %331)[i64 0][i32 %4]; + store i32* %332 with i32 %330; + [1000 * i32]* %333 = getelementptr &([1000 * i32]* %6)[i64 65]; i32* %334 = getelementptr &([1000 * i32]* %333)[i64 0][i32 %4]; i32 %335 = load i32* %334; - i32* %336 = getelementptr &(i32* %10)[i64 81]; - store i32* %336 with i32 %335; - [1000 * i32]* %337 = getelementptr &([1000 * i32]* %7)[i64 82]; - i32* %338 = getelementptr &([1000 * i32]* %337)[i64 0][i32 %4]; - i32 %339 = load i32* %338; - i32* %340 = getelementptr &(i32* %10)[i64 82]; - store i32* %340 with i32 %339; - [1000 * i32]* %341 = getelementptr &([1000 * i32]* %7)[i64 83]; + [1000 * i32]* %336 = getelementptr &([1000 * i32]* %9)[i64 65]; + i32* %337 = getelementptr &([1000 * i32]* %336)[i64 0][i32 %4]; + store i32* %337 with i32 %335; + [1000 * i32]* %338 = getelementptr &([1000 * i32]* %6)[i64 66]; + i32* %339 = getelementptr &([1000 * i32]* %338)[i64 0][i32 %4]; + i32 %340 = load i32* %339; + [1000 * i32]* %341 = getelementptr &([1000 * i32]* %9)[i64 66]; i32* %342 = getelementptr &([1000 * i32]* %341)[i64 0][i32 %4]; - i32 %343 = load i32* %342; - i32* %344 = getelementptr &(i32* %10)[i64 83]; - store i32* %344 with i32 %343; - [1000 * i32]* %345 = getelementptr &([1000 * i32]* %7)[i64 84]; - i32* %346 = getelementptr &([1000 * i32]* %345)[i64 0][i32 %4]; - i32 %347 = load i32* %346; - i32* %348 = getelementptr &(i32* %10)[i64 84]; - store i32* %348 with i32 %347; - [1000 * i32]* %349 = getelementptr &([1000 * i32]* %7)[i64 85]; - i32* %350 = getelementptr &([1000 * i32]* %349)[i64 0][i32 %4]; - i32 %351 = load i32* %350; - i32* %352 = getelementptr &(i32* %10)[i64 85]; - store i32* %352 with i32 %351; - [1000 * i32]* %353 = getelementptr &([1000 * i32]* %7)[i64 86]; + store i32* %342 with i32 %340; + [1000 * i32]* %343 = getelementptr &([1000 * i32]* %6)[i64 67]; + i32* %344 = getelementptr &([1000 * i32]* %343)[i64 0][i32 %4]; + i32 %345 = load i32* %344; + [1000 * i32]* %346 = getelementptr &([1000 * i32]* %9)[i64 67]; + i32* %347 = getelementptr &([1000 * i32]* %346)[i64 0][i32 %4]; + store i32* %347 with i32 %345; + [1000 * i32]* %348 = getelementptr &([1000 * i32]* %6)[i64 68]; + i32* %349 = getelementptr &([1000 * i32]* %348)[i64 0][i32 %4]; + i32 %350 = load i32* %349; + [1000 * i32]* %351 = getelementptr &([1000 * i32]* %9)[i64 68]; + i32* %352 = getelementptr &([1000 * i32]* %351)[i64 0][i32 %4]; + store i32* %352 with i32 %350; + [1000 * i32]* %353 = getelementptr &([1000 * i32]* %6)[i64 69]; i32* %354 = getelementptr &([1000 * i32]* %353)[i64 0][i32 %4]; i32 %355 = load i32* %354; - i32* %356 = getelementptr &(i32* %10)[i64 86]; - store i32* %356 with i32 %355; - [1000 * i32]* %357 = getelementptr &([1000 * i32]* %7)[i64 87]; - i32* %358 = getelementptr &([1000 * i32]* %357)[i64 0][i32 %4]; - i32 %359 = load i32* %358; - i32* %360 = getelementptr &(i32* %10)[i64 87]; - store i32* %360 with i32 %359; - [1000 * i32]* %361 = getelementptr &([1000 * i32]* %7)[i64 88]; + [1000 * i32]* %356 = getelementptr &([1000 * i32]* %9)[i64 69]; + i32* %357 = getelementptr &([1000 * i32]* %356)[i64 0][i32 %4]; + store i32* %357 with i32 %355; + [1000 * i32]* %358 = getelementptr &([1000 * i32]* %6)[i64 70]; + i32* %359 = getelementptr &([1000 * i32]* %358)[i64 0][i32 %4]; + i32 %360 = load i32* %359; + [1000 * i32]* %361 = getelementptr &([1000 * i32]* %9)[i64 70]; i32* %362 = getelementptr &([1000 * i32]* %361)[i64 0][i32 %4]; - i32 %363 = load i32* %362; - i32* %364 = getelementptr &(i32* %10)[i64 88]; - store i32* %364 with i32 %363; - [1000 * i32]* %365 = getelementptr &([1000 * i32]* %7)[i64 89]; - i32* %366 = getelementptr &([1000 * i32]* %365)[i64 0][i32 %4]; - i32 %367 = load i32* %366; - i32* %368 = getelementptr &(i32* %10)[i64 89]; - store i32* %368 with i32 %367; - [1000 * i32]* %369 = getelementptr &([1000 * i32]* %7)[i64 90]; - i32* %370 = getelementptr &([1000 * i32]* %369)[i64 0][i32 %4]; - i32 %371 = load i32* %370; - i32* %372 = getelementptr &(i32* %10)[i64 90]; - store i32* %372 with i32 %371; - [1000 * i32]* %373 = getelementptr &([1000 * i32]* %7)[i64 91]; + store i32* %362 with i32 %360; + [1000 * i32]* %363 = getelementptr &([1000 * i32]* %6)[i64 71]; + i32* %364 = getelementptr &([1000 * i32]* %363)[i64 0][i32 %4]; + i32 %365 = load i32* %364; + [1000 * i32]* %366 = getelementptr &([1000 * i32]* %9)[i64 71]; + i32* %367 = getelementptr &([1000 * i32]* %366)[i64 0][i32 %4]; + store i32* %367 with i32 %365; + [1000 * i32]* %368 = getelementptr &([1000 * i32]* %6)[i64 72]; + i32* %369 = getelementptr &([1000 * i32]* %368)[i64 0][i32 %4]; + i32 %370 = load i32* %369; + [1000 * i32]* %371 = getelementptr &([1000 * i32]* %9)[i64 72]; + i32* %372 = getelementptr &([1000 * i32]* %371)[i64 0][i32 %4]; + store i32* %372 with i32 %370; + [1000 * i32]* %373 = getelementptr &([1000 * i32]* %6)[i64 73]; i32* %374 = getelementptr &([1000 * i32]* %373)[i64 0][i32 %4]; i32 %375 = load i32* %374; - i32* %376 = getelementptr &(i32* %10)[i64 91]; - store i32* %376 with i32 %375; - [1000 * i32]* %377 = getelementptr &([1000 * i32]* %7)[i64 92]; - i32* %378 = getelementptr &([1000 * i32]* %377)[i64 0][i32 %4]; - i32 %379 = load i32* %378; - i32* %380 = getelementptr &(i32* %10)[i64 92]; - store i32* %380 with i32 %379; - [1000 * i32]* %381 = getelementptr &([1000 * i32]* %7)[i64 93]; + [1000 * i32]* %376 = getelementptr &([1000 * i32]* %9)[i64 73]; + i32* %377 = getelementptr &([1000 * i32]* %376)[i64 0][i32 %4]; + store i32* %377 with i32 %375; + [1000 * i32]* %378 = getelementptr &([1000 * i32]* %6)[i64 74]; + i32* %379 = getelementptr &([1000 * i32]* %378)[i64 0][i32 %4]; + i32 %380 = load i32* %379; + [1000 * i32]* %381 = getelementptr &([1000 * i32]* %9)[i64 74]; i32* %382 = getelementptr &([1000 * i32]* %381)[i64 0][i32 %4]; - i32 %383 = load i32* %382; - i32* %384 = getelementptr &(i32* %10)[i64 93]; - store i32* %384 with i32 %383; - [1000 * i32]* %385 = getelementptr &([1000 * i32]* %7)[i64 94]; - i32* %386 = getelementptr &([1000 * i32]* %385)[i64 0][i32 %4]; - i32 %387 = load i32* %386; - i32* %388 = getelementptr &(i32* %10)[i64 94]; - store i32* %388 with i32 %387; - [1000 * i32]* %389 = getelementptr &([1000 * i32]* %7)[i64 95]; - i32* %390 = getelementptr &([1000 * i32]* %389)[i64 0][i32 %4]; - i32 %391 = load i32* %390; - i32* %392 = getelementptr &(i32* %10)[i64 95]; - store i32* %392 with i32 %391; - [1000 * i32]* %393 = getelementptr &([1000 * i32]* %7)[i64 96]; + store i32* %382 with i32 %380; + [1000 * i32]* %383 = getelementptr &([1000 * i32]* %6)[i64 75]; + i32* %384 = getelementptr &([1000 * i32]* %383)[i64 0][i32 %4]; + i32 %385 = load i32* %384; + [1000 * i32]* %386 = getelementptr &([1000 * i32]* %9)[i64 75]; + i32* %387 = getelementptr &([1000 * i32]* %386)[i64 0][i32 %4]; + store i32* %387 with i32 %385; + [1000 * i32]* %388 = getelementptr &([1000 * i32]* %6)[i64 76]; + i32* %389 = getelementptr &([1000 * i32]* %388)[i64 0][i32 %4]; + i32 %390 = load i32* %389; + [1000 * i32]* %391 = getelementptr &([1000 * i32]* %9)[i64 76]; + i32* %392 = getelementptr &([1000 * i32]* %391)[i64 0][i32 %4]; + store i32* %392 with i32 %390; + [1000 * i32]* %393 = getelementptr &([1000 * i32]* %6)[i64 77]; i32* %394 = getelementptr &([1000 * i32]* %393)[i64 0][i32 %4]; i32 %395 = load i32* %394; - i32* %396 = getelementptr &(i32* %10)[i64 96]; - store i32* %396 with i32 %395; - [1000 * i32]* %397 = getelementptr &([1000 * i32]* %7)[i64 97]; - i32* %398 = getelementptr &([1000 * i32]* %397)[i64 0][i32 %4]; - i32 %399 = load i32* %398; - i32* %400 = getelementptr &(i32* %10)[i64 97]; - store i32* %400 with i32 %399; - [1000 * i32]* %401 = getelementptr &([1000 * i32]* %7)[i64 98]; + [1000 * i32]* %396 = getelementptr &([1000 * i32]* %9)[i64 77]; + i32* %397 = getelementptr &([1000 * i32]* %396)[i64 0][i32 %4]; + store i32* %397 with i32 %395; + [1000 * i32]* %398 = getelementptr &([1000 * i32]* %6)[i64 78]; + i32* %399 = getelementptr &([1000 * i32]* %398)[i64 0][i32 %4]; + i32 %400 = load i32* %399; + [1000 * i32]* %401 = getelementptr &([1000 * i32]* %9)[i64 78]; i32* %402 = getelementptr &([1000 * i32]* %401)[i64 0][i32 %4]; - i32 %403 = load i32* %402; - i32* %404 = getelementptr &(i32* %10)[i64 98]; - store i32* %404 with i32 %403; - [1000 * i32]* %405 = getelementptr &([1000 * i32]* %7)[i64 99]; - i32* %406 = getelementptr &([1000 * i32]* %405)[i64 0][i32 %4]; - i32 %407 = load i32* %406; - i32* %408 = getelementptr &(i32* %10)[i64 99]; - store i32* %408 with i32 %407; - [1000 * i32]* %409 = getelementptr &([1000 * i32]* %7)[i64 100]; - i32* %410 = getelementptr &([1000 * i32]* %409)[i64 0][i32 %4]; - i32 %411 = load i32* %410; - i32* %412 = getelementptr &(i32* %10)[i64 100]; - store i32* %412 with i32 %411; - [1000 * i32]* %413 = getelementptr &([1000 * i32]* %7)[i64 101]; + store i32* %402 with i32 %400; + [1000 * i32]* %403 = getelementptr &([1000 * i32]* %6)[i64 79]; + i32* %404 = getelementptr &([1000 * i32]* %403)[i64 0][i32 %4]; + i32 %405 = load i32* %404; + [1000 * i32]* %406 = getelementptr &([1000 * i32]* %9)[i64 79]; + i32* %407 = getelementptr &([1000 * i32]* %406)[i64 0][i32 %4]; + store i32* %407 with i32 %405; + [1000 * i32]* %408 = getelementptr &([1000 * i32]* %6)[i64 80]; + i32* %409 = getelementptr &([1000 * i32]* %408)[i64 0][i32 %4]; + i32 %410 = load i32* %409; + [1000 * i32]* %411 = getelementptr &([1000 * i32]* %9)[i64 80]; + i32* %412 = getelementptr &([1000 * i32]* %411)[i64 0][i32 %4]; + store i32* %412 with i32 %410; + [1000 * i32]* %413 = getelementptr &([1000 * i32]* %6)[i64 81]; i32* %414 = getelementptr &([1000 * i32]* %413)[i64 0][i32 %4]; i32 %415 = load i32* %414; - i32* %416 = getelementptr &(i32* %10)[i64 101]; - store i32* %416 with i32 %415; - [1000 * i32]* %417 = getelementptr &([1000 * i32]* %7)[i64 102]; - i32* %418 = getelementptr &([1000 * i32]* %417)[i64 0][i32 %4]; - i32 %419 = load i32* %418; - i32* %420 = getelementptr &(i32* %10)[i64 102]; - store i32* %420 with i32 %419; - [1000 * i32]* %421 = getelementptr &([1000 * i32]* %7)[i64 103]; + [1000 * i32]* %416 = getelementptr &([1000 * i32]* %9)[i64 81]; + i32* %417 = getelementptr &([1000 * i32]* %416)[i64 0][i32 %4]; + store i32* %417 with i32 %415; + [1000 * i32]* %418 = getelementptr &([1000 * i32]* %6)[i64 82]; + i32* %419 = getelementptr &([1000 * i32]* %418)[i64 0][i32 %4]; + i32 %420 = load i32* %419; + [1000 * i32]* %421 = getelementptr &([1000 * i32]* %9)[i64 82]; i32* %422 = getelementptr &([1000 * i32]* %421)[i64 0][i32 %4]; - i32 %423 = load i32* %422; - i32* %424 = getelementptr &(i32* %10)[i64 103]; - store i32* %424 with i32 %423; - i32 %425 = add i32 %4, i32 1; - i1 %426 = icmp sgt i32 %1, i32 %425; - cbr i1 %426(prob = 0.984615), ^b1, ^b2; + store i32* %422 with i32 %420; + [1000 * i32]* %423 = getelementptr &([1000 * i32]* %6)[i64 83]; + i32* %424 = getelementptr &([1000 * i32]* %423)[i64 0][i32 %4]; + i32 %425 = load i32* %424; + [1000 * i32]* %426 = getelementptr &([1000 * i32]* %9)[i64 83]; + i32* %427 = getelementptr &([1000 * i32]* %426)[i64 0][i32 %4]; + store i32* %427 with i32 %425; + [1000 * i32]* %428 = getelementptr &([1000 * i32]* %6)[i64 84]; + i32* %429 = getelementptr &([1000 * i32]* %428)[i64 0][i32 %4]; + i32 %430 = load i32* %429; + [1000 * i32]* %431 = getelementptr &([1000 * i32]* %9)[i64 84]; + i32* %432 = getelementptr &([1000 * i32]* %431)[i64 0][i32 %4]; + store i32* %432 with i32 %430; + [1000 * i32]* %433 = getelementptr &([1000 * i32]* %6)[i64 85]; + i32* %434 = getelementptr &([1000 * i32]* %433)[i64 0][i32 %4]; + i32 %435 = load i32* %434; + [1000 * i32]* %436 = getelementptr &([1000 * i32]* %9)[i64 85]; + i32* %437 = getelementptr &([1000 * i32]* %436)[i64 0][i32 %4]; + store i32* %437 with i32 %435; + [1000 * i32]* %438 = getelementptr &([1000 * i32]* %6)[i64 86]; + i32* %439 = getelementptr &([1000 * i32]* %438)[i64 0][i32 %4]; + i32 %440 = load i32* %439; + [1000 * i32]* %441 = getelementptr &([1000 * i32]* %9)[i64 86]; + i32* %442 = getelementptr &([1000 * i32]* %441)[i64 0][i32 %4]; + store i32* %442 with i32 %440; + [1000 * i32]* %443 = getelementptr &([1000 * i32]* %6)[i64 87]; + i32* %444 = getelementptr &([1000 * i32]* %443)[i64 0][i32 %4]; + i32 %445 = load i32* %444; + [1000 * i32]* %446 = getelementptr &([1000 * i32]* %9)[i64 87]; + i32* %447 = getelementptr &([1000 * i32]* %446)[i64 0][i32 %4]; + store i32* %447 with i32 %445; + [1000 * i32]* %448 = getelementptr &([1000 * i32]* %6)[i64 88]; + i32* %449 = getelementptr &([1000 * i32]* %448)[i64 0][i32 %4]; + i32 %450 = load i32* %449; + [1000 * i32]* %451 = getelementptr &([1000 * i32]* %9)[i64 88]; + i32* %452 = getelementptr &([1000 * i32]* %451)[i64 0][i32 %4]; + store i32* %452 with i32 %450; + [1000 * i32]* %453 = getelementptr &([1000 * i32]* %6)[i64 89]; + i32* %454 = getelementptr &([1000 * i32]* %453)[i64 0][i32 %4]; + i32 %455 = load i32* %454; + [1000 * i32]* %456 = getelementptr &([1000 * i32]* %9)[i64 89]; + i32* %457 = getelementptr &([1000 * i32]* %456)[i64 0][i32 %4]; + store i32* %457 with i32 %455; + [1000 * i32]* %458 = getelementptr &([1000 * i32]* %6)[i64 90]; + i32* %459 = getelementptr &([1000 * i32]* %458)[i64 0][i32 %4]; + i32 %460 = load i32* %459; + [1000 * i32]* %461 = getelementptr &([1000 * i32]* %9)[i64 90]; + i32* %462 = getelementptr &([1000 * i32]* %461)[i64 0][i32 %4]; + store i32* %462 with i32 %460; + [1000 * i32]* %463 = getelementptr &([1000 * i32]* %6)[i64 91]; + i32* %464 = getelementptr &([1000 * i32]* %463)[i64 0][i32 %4]; + i32 %465 = load i32* %464; + [1000 * i32]* %466 = getelementptr &([1000 * i32]* %9)[i64 91]; + i32* %467 = getelementptr &([1000 * i32]* %466)[i64 0][i32 %4]; + store i32* %467 with i32 %465; + [1000 * i32]* %468 = getelementptr &([1000 * i32]* %6)[i64 92]; + i32* %469 = getelementptr &([1000 * i32]* %468)[i64 0][i32 %4]; + i32 %470 = load i32* %469; + [1000 * i32]* %471 = getelementptr &([1000 * i32]* %9)[i64 92]; + i32* %472 = getelementptr &([1000 * i32]* %471)[i64 0][i32 %4]; + store i32* %472 with i32 %470; + [1000 * i32]* %473 = getelementptr &([1000 * i32]* %6)[i64 93]; + i32* %474 = getelementptr &([1000 * i32]* %473)[i64 0][i32 %4]; + i32 %475 = load i32* %474; + [1000 * i32]* %476 = getelementptr &([1000 * i32]* %9)[i64 93]; + i32* %477 = getelementptr &([1000 * i32]* %476)[i64 0][i32 %4]; + store i32* %477 with i32 %475; + [1000 * i32]* %478 = getelementptr &([1000 * i32]* %6)[i64 94]; + i32* %479 = getelementptr &([1000 * i32]* %478)[i64 0][i32 %4]; + i32 %480 = load i32* %479; + [1000 * i32]* %481 = getelementptr &([1000 * i32]* %9)[i64 94]; + i32* %482 = getelementptr &([1000 * i32]* %481)[i64 0][i32 %4]; + store i32* %482 with i32 %480; + [1000 * i32]* %483 = getelementptr &([1000 * i32]* %6)[i64 95]; + i32* %484 = getelementptr &([1000 * i32]* %483)[i64 0][i32 %4]; + i32 %485 = load i32* %484; + [1000 * i32]* %486 = getelementptr &([1000 * i32]* %9)[i64 95]; + i32* %487 = getelementptr &([1000 * i32]* %486)[i64 0][i32 %4]; + store i32* %487 with i32 %485; + [1000 * i32]* %488 = getelementptr &([1000 * i32]* %6)[i64 96]; + i32* %489 = getelementptr &([1000 * i32]* %488)[i64 0][i32 %4]; + i32 %490 = load i32* %489; + [1000 * i32]* %491 = getelementptr &([1000 * i32]* %9)[i64 96]; + i32* %492 = getelementptr &([1000 * i32]* %491)[i64 0][i32 %4]; + store i32* %492 with i32 %490; + [1000 * i32]* %493 = getelementptr &([1000 * i32]* %6)[i64 97]; + i32* %494 = getelementptr &([1000 * i32]* %493)[i64 0][i32 %4]; + i32 %495 = load i32* %494; + [1000 * i32]* %496 = getelementptr &([1000 * i32]* %9)[i64 97]; + i32* %497 = getelementptr &([1000 * i32]* %496)[i64 0][i32 %4]; + store i32* %497 with i32 %495; + [1000 * i32]* %498 = getelementptr &([1000 * i32]* %6)[i64 98]; + i32* %499 = getelementptr &([1000 * i32]* %498)[i64 0][i32 %4]; + i32 %500 = load i32* %499; + [1000 * i32]* %501 = getelementptr &([1000 * i32]* %9)[i64 98]; + i32* %502 = getelementptr &([1000 * i32]* %501)[i64 0][i32 %4]; + store i32* %502 with i32 %500; + [1000 * i32]* %503 = getelementptr &([1000 * i32]* %6)[i64 99]; + i32* %504 = getelementptr &([1000 * i32]* %503)[i64 0][i32 %4]; + i32 %505 = load i32* %504; + [1000 * i32]* %506 = getelementptr &([1000 * i32]* %9)[i64 99]; + i32* %507 = getelementptr &([1000 * i32]* %506)[i64 0][i32 %4]; + store i32* %507 with i32 %505; + [1000 * i32]* %508 = getelementptr &([1000 * i32]* %6)[i64 100]; + i32* %509 = getelementptr &([1000 * i32]* %508)[i64 0][i32 %4]; + i32 %510 = load i32* %509; + [1000 * i32]* %511 = getelementptr &([1000 * i32]* %9)[i64 100]; + i32* %512 = getelementptr &([1000 * i32]* %511)[i64 0][i32 %4]; + store i32* %512 with i32 %510; + [1000 * i32]* %513 = getelementptr &([1000 * i32]* %6)[i64 101]; + i32* %514 = getelementptr &([1000 * i32]* %513)[i64 0][i32 %4]; + i32 %515 = load i32* %514; + [1000 * i32]* %516 = getelementptr &([1000 * i32]* %9)[i64 101]; + i32* %517 = getelementptr &([1000 * i32]* %516)[i64 0][i32 %4]; + store i32* %517 with i32 %515; + [1000 * i32]* %518 = getelementptr &([1000 * i32]* %6)[i64 102]; + i32* %519 = getelementptr &([1000 * i32]* %518)[i64 0][i32 %4]; + i32 %520 = load i32* %519; + [1000 * i32]* %521 = getelementptr &([1000 * i32]* %9)[i64 102]; + i32* %522 = getelementptr &([1000 * i32]* %521)[i64 0][i32 %4]; + store i32* %522 with i32 %520; + [1000 * i32]* %523 = getelementptr &([1000 * i32]* %6)[i64 103]; + i32* %524 = getelementptr &([1000 * i32]* %523)[i64 0][i32 %4]; + i32 %525 = load i32* %524; + [1000 * i32]* %526 = getelementptr &([1000 * i32]* %9)[i64 103]; + i32* %527 = getelementptr &([1000 * i32]* %526)[i64 0][i32 %4]; + store i32* %527 with i32 %525; + i32 %528 = add i32 %4, i32 1; + i1 %529 = icmp sgt i32 %1, i32 %528; + cbr i1 %529(prob = 0.984615), ^b1, ^b2; ^b2: ret; } internal func @cmmc_parallel_body_1(i32 %0, i32 %1) -> void { NoRecurse ParallelBody AlignedParallelBody } { ^b: - [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @c to [1000 * [1000 * i32]]*; - [1000 * [1000 * i32]]* %4 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %2 = ptrcast [1000 * [1000 * i32]]* @c to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %3 = ptrcast [1000 * [1000 * i32]]* @a to [1000 * [1000 * i32]]*; + [1000 * [1000 * i32]]* %4 = ptrcast [1000 * [1000 * i32]]* @b to [1000 * [1000 * i32]]*; ubr ^b1; ^b1: - i32 %5 = phi [^b, i32 %0] [^b2, i32 %184]; - [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; - [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %4)[i64 0][i32 %5]; + i32 %5 = phi [^b, i32 %0] [^b2, i32 %161]; + [1000 * i32]* %6 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %5]; + [1000 * i32]* %7 = getelementptr &([1000 * [1000 * i32]]* %3)[i64 0][i32 %5]; ubr ^while.body; ^while.body: - i32 %8 = phi [^b1, i32 0] [^while.body2, i32 %182]; + i32 %8 = phi [^b1, i32 0] [^while.body2, i32 %159]; + [1000 * i32]* %9 = getelementptr &([1000 * [1000 * i32]]* %4)[i64 0][i32 %8]; ubr ^while.body1; ^while.body1: - i32 %9 = phi [^while.body, i32 0] [^while.body1, i32 %123]; - i32 %10 = phi [^while.body, i32 0] [^while.body1, i32 %122]; - [1000 * i32]* %11 = getelementptr &([1000 * [1000 * i32]]* %2)[i64 0][i32 %9]; - i32* %12 = getelementptr &([1000 * i32]* %11)[i64 0][i32 %8]; - i32* %13 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %9]; - i32 %14 = load i32* %13; - i32 %15 = load i32* %12; - [1000 * i32]* %16 = getelementptr &([1000 * i32]* %11)[i64 1]; - i32* %17 = getelementptr &([1000 * i32]* %16)[i64 0][i32 %8]; - i32* %18 = getelementptr &(i32* %13)[i64 1]; + i32 %10 = phi [^while.body, i32 0] [^while.body1, i32 %108]; + i32 %11 = phi [^while.body, i32 0] [^while.body1, i32 %107]; + i32* %12 = getelementptr &([1000 * i32]* %7)[i64 0][i32 %10]; + i32 %13 = load i32* %12; + i32* %14 = getelementptr &([1000 * i32]* %9)[i64 0][i32 %10]; + i32 %15 = load i32* %14; + i32* %16 = getelementptr &(i32* %12)[i64 1]; + i32 %17 = load i32* %16; + i32* %18 = getelementptr &(i32* %14)[i64 1]; i32 %19 = load i32* %18; - i32 %20 = load i32* %17; - i32 %21 = mul i32 %19, i32 %20; - i32 %22 = mul i32 %14, i32 %15; - i32 %23 = add i32 %21, i32 %22; - [1000 * i32]* %24 = getelementptr &([1000 * i32]* %11)[i64 2]; - i32* %25 = getelementptr &([1000 * i32]* %24)[i64 0][i32 %8]; - i32* %26 = getelementptr &(i32* %13)[i64 2]; - i32 %27 = load i32* %26; - i32 %28 = load i32* %25; - i32 %29 = mul i32 %27, i32 %28; - i32 %30 = add i32 %23, i32 %29; - [1000 * i32]* %31 = getelementptr &([1000 * i32]* %11)[i64 3]; - i32* %32 = getelementptr &([1000 * i32]* %31)[i64 0][i32 %8]; - i32* %33 = getelementptr &(i32* %13)[i64 3]; - i32 %34 = load i32* %33; - i32 %35 = load i32* %32; - i32 %36 = mul i32 %34, i32 %35; - i32 %37 = add i32 %30, i32 %36; - [1000 * i32]* %38 = getelementptr &([1000 * i32]* %11)[i64 4]; - i32* %39 = getelementptr &([1000 * i32]* %38)[i64 0][i32 %8]; - i32* %40 = getelementptr &(i32* %13)[i64 4]; - i32 %41 = load i32* %40; - i32 %42 = load i32* %39; - i32 %43 = mul i32 %41, i32 %42; - i32 %44 = add i32 %37, i32 %43; - [1000 * i32]* %45 = getelementptr &([1000 * i32]* %11)[i64 5]; - i32* %46 = getelementptr &([1000 * i32]* %45)[i64 0][i32 %8]; - i32* %47 = getelementptr &(i32* %13)[i64 5]; + i32 %20 = mul i32 %17, i32 %19; + i32 %21 = mul i32 %13, i32 %15; + i32 %22 = add i32 %20, i32 %21; + i32* %23 = getelementptr &(i32* %12)[i64 2]; + i32 %24 = load i32* %23; + i32* %25 = getelementptr &(i32* %14)[i64 2]; + i32 %26 = load i32* %25; + i32 %27 = mul i32 %24, i32 %26; + i32 %28 = add i32 %22, i32 %27; + i32* %29 = getelementptr &(i32* %12)[i64 3]; + i32 %30 = load i32* %29; + i32* %31 = getelementptr &(i32* %14)[i64 3]; + i32 %32 = load i32* %31; + i32 %33 = mul i32 %30, i32 %32; + i32 %34 = add i32 %28, i32 %33; + i32* %35 = getelementptr &(i32* %12)[i64 4]; + i32 %36 = load i32* %35; + i32* %37 = getelementptr &(i32* %14)[i64 4]; + i32 %38 = load i32* %37; + i32 %39 = mul i32 %36, i32 %38; + i32 %40 = add i32 %34, i32 %39; + i32* %41 = getelementptr &(i32* %12)[i64 5]; + i32 %42 = load i32* %41; + i32* %43 = getelementptr &(i32* %14)[i64 5]; + i32 %44 = load i32* %43; + i32 %45 = mul i32 %42, i32 %44; + i32 %46 = add i32 %40, i32 %45; + i32* %47 = getelementptr &(i32* %12)[i64 6]; i32 %48 = load i32* %47; - i32 %49 = load i32* %46; - i32 %50 = mul i32 %48, i32 %49; - i32 %51 = add i32 %44, i32 %50; - [1000 * i32]* %52 = getelementptr &([1000 * i32]* %11)[i64 6]; - i32* %53 = getelementptr &([1000 * i32]* %52)[i64 0][i32 %8]; - i32* %54 = getelementptr &(i32* %13)[i64 6]; - i32 %55 = load i32* %54; - i32 %56 = load i32* %53; - i32 %57 = mul i32 %55, i32 %56; - i32 %58 = add i32 %51, i32 %57; - [1000 * i32]* %59 = getelementptr &([1000 * i32]* %11)[i64 7]; - i32* %60 = getelementptr &([1000 * i32]* %59)[i64 0][i32 %8]; - i32* %61 = getelementptr &(i32* %13)[i64 7]; + i32* %49 = getelementptr &(i32* %14)[i64 6]; + i32 %50 = load i32* %49; + i32 %51 = mul i32 %48, i32 %50; + i32 %52 = add i32 %46, i32 %51; + i32* %53 = getelementptr &(i32* %12)[i64 7]; + i32 %54 = load i32* %53; + i32* %55 = getelementptr &(i32* %14)[i64 7]; + i32 %56 = load i32* %55; + i32 %57 = mul i32 %54, i32 %56; + i32 %58 = add i32 %52, i32 %57; + i32* %59 = getelementptr &(i32* %12)[i64 8]; + i32 %60 = load i32* %59; + i32* %61 = getelementptr &(i32* %14)[i64 8]; i32 %62 = load i32* %61; - i32 %63 = load i32* %60; - i32 %64 = mul i32 %62, i32 %63; - i32 %65 = add i32 %58, i32 %64; - [1000 * i32]* %66 = getelementptr &([1000 * i32]* %11)[i64 8]; - i32* %67 = getelementptr &([1000 * i32]* %66)[i64 0][i32 %8]; - i32* %68 = getelementptr &(i32* %13)[i64 8]; - i32 %69 = load i32* %68; - i32 %70 = load i32* %67; - i32 %71 = mul i32 %69, i32 %70; - i32 %72 = add i32 %65, i32 %71; - [1000 * i32]* %73 = getelementptr &([1000 * i32]* %11)[i64 9]; - i32* %74 = getelementptr &([1000 * i32]* %73)[i64 0][i32 %8]; - i32* %75 = getelementptr &(i32* %13)[i64 9]; - i32 %76 = load i32* %75; - i32 %77 = load i32* %74; - i32 %78 = mul i32 %76, i32 %77; - i32 %79 = add i32 %72, i32 %78; - [1000 * i32]* %80 = getelementptr &([1000 * i32]* %11)[i64 10]; - i32* %81 = getelementptr &([1000 * i32]* %80)[i64 0][i32 %8]; - i32* %82 = getelementptr &(i32* %13)[i64 10]; - i32 %83 = load i32* %82; - i32 %84 = load i32* %81; - i32 %85 = mul i32 %83, i32 %84; - i32 %86 = add i32 %79, i32 %85; - [1000 * i32]* %87 = getelementptr &([1000 * i32]* %11)[i64 11]; - i32* %88 = getelementptr &([1000 * i32]* %87)[i64 0][i32 %8]; - i32* %89 = getelementptr &(i32* %13)[i64 11]; + i32 %63 = mul i32 %60, i32 %62; + i32 %64 = add i32 %58, i32 %63; + i32* %65 = getelementptr &(i32* %12)[i64 9]; + i32 %66 = load i32* %65; + i32* %67 = getelementptr &(i32* %14)[i64 9]; + i32 %68 = load i32* %67; + i32 %69 = mul i32 %66, i32 %68; + i32 %70 = add i32 %64, i32 %69; + i32* %71 = getelementptr &(i32* %12)[i64 10]; + i32 %72 = load i32* %71; + i32* %73 = getelementptr &(i32* %14)[i64 10]; + i32 %74 = load i32* %73; + i32 %75 = mul i32 %72, i32 %74; + i32 %76 = add i32 %70, i32 %75; + i32* %77 = getelementptr &(i32* %12)[i64 11]; + i32 %78 = load i32* %77; + i32* %79 = getelementptr &(i32* %14)[i64 11]; + i32 %80 = load i32* %79; + i32 %81 = mul i32 %78, i32 %80; + i32 %82 = add i32 %76, i32 %81; + i32* %83 = getelementptr &(i32* %12)[i64 12]; + i32 %84 = load i32* %83; + i32* %85 = getelementptr &(i32* %14)[i64 12]; + i32 %86 = load i32* %85; + i32 %87 = mul i32 %84, i32 %86; + i32 %88 = add i32 %82, i32 %87; + i32* %89 = getelementptr &(i32* %12)[i64 13]; i32 %90 = load i32* %89; - i32 %91 = load i32* %88; - i32 %92 = mul i32 %90, i32 %91; - i32 %93 = add i32 %86, i32 %92; - [1000 * i32]* %94 = getelementptr &([1000 * i32]* %11)[i64 12]; - i32* %95 = getelementptr &([1000 * i32]* %94)[i64 0][i32 %8]; - i32* %96 = getelementptr &(i32* %13)[i64 12]; - i32 %97 = load i32* %96; - i32 %98 = load i32* %95; - i32 %99 = mul i32 %97, i32 %98; - i32 %100 = add i32 %93, i32 %99; - [1000 * i32]* %101 = getelementptr &([1000 * i32]* %11)[i64 13]; - i32* %102 = getelementptr &([1000 * i32]* %101)[i64 0][i32 %8]; - i32* %103 = getelementptr &(i32* %13)[i64 13]; + i32* %91 = getelementptr &(i32* %14)[i64 13]; + i32 %92 = load i32* %91; + i32 %93 = mul i32 %90, i32 %92; + i32 %94 = add i32 %88, i32 %93; + i32* %95 = getelementptr &(i32* %12)[i64 14]; + i32 %96 = load i32* %95; + i32* %97 = getelementptr &(i32* %14)[i64 14]; + i32 %98 = load i32* %97; + i32 %99 = mul i32 %96, i32 %98; + i32 %100 = add i32 %94, i32 %99; + i32* %101 = getelementptr &(i32* %12)[i64 15]; + i32 %102 = load i32* %101; + i32* %103 = getelementptr &(i32* %14)[i64 15]; i32 %104 = load i32* %103; - i32 %105 = load i32* %102; - i32 %106 = mul i32 %104, i32 %105; - i32 %107 = add i32 %100, i32 %106; - [1000 * i32]* %108 = getelementptr &([1000 * i32]* %11)[i64 14]; - i32* %109 = getelementptr &([1000 * i32]* %108)[i64 0][i32 %8]; - i32* %110 = getelementptr &(i32* %13)[i64 14]; - i32 %111 = load i32* %110; - i32 %112 = load i32* %109; - i32 %113 = mul i32 %111, i32 %112; - i32 %114 = add i32 %107, i32 %113; - [1000 * i32]* %115 = getelementptr &([1000 * i32]* %11)[i64 15]; - i32* %116 = getelementptr &([1000 * i32]* %115)[i64 0][i32 %8]; - i32* %117 = getelementptr &(i32* %13)[i64 15]; - i32 %118 = load i32* %117; - i32 %119 = load i32* %116; - i32 %120 = mul i32 %118, i32 %119; - i32 %121 = add i32 %114, i32 %120; - i32 %122 = add i32 %10, i32 %121; - i32 %123 = add i32 %9, i32 16; - i1 %124 = icmp slt i32 %123, i32 992; - cbr i1 %124(prob = 0.983871), ^while.body1, ^while.body2; + i32 %105 = mul i32 %102, i32 %104; + i32 %106 = add i32 %100, i32 %105; + i32 %107 = add i32 %11, i32 %106; + i32 %108 = add i32 %10, i32 16; + i1 %109 = icmp slt i32 %108, i32 992; + cbr i1 %109(prob = 0.983871), ^while.body1, ^while.body2; ^while.body2: - [1000 * i32]* %125 = getelementptr &([1000 * i32]* %11)[i64 16]; - i32* %126 = getelementptr &([1000 * i32]* %125)[i64 0][i32 %8]; - i32* %127 = getelementptr &(i32* %13)[i64 16]; - i32 %128 = load i32* %127; - i32 %129 = load i32* %126; - i32 %130 = mul i32 %128, i32 %129; - i32 %131 = add i32 %122, i32 %130; - [1000 * i32]* %132 = getelementptr &([1000 * i32]* %11)[i64 17]; - i32* %133 = getelementptr &([1000 * i32]* %132)[i64 0][i32 %8]; - i32* %134 = getelementptr &(i32* %13)[i64 17]; + i32* %110 = getelementptr &(i32* %12)[i64 16]; + i32 %111 = load i32* %110; + i32* %112 = getelementptr &(i32* %14)[i64 16]; + i32 %113 = load i32* %112; + i32 %114 = mul i32 %111, i32 %113; + i32 %115 = add i32 %107, i32 %114; + i32* %116 = getelementptr &(i32* %12)[i64 17]; + i32 %117 = load i32* %116; + i32* %118 = getelementptr &(i32* %14)[i64 17]; + i32 %119 = load i32* %118; + i32 %120 = mul i32 %117, i32 %119; + i32 %121 = add i32 %115, i32 %120; + i32* %122 = getelementptr &(i32* %12)[i64 18]; + i32 %123 = load i32* %122; + i32* %124 = getelementptr &(i32* %14)[i64 18]; + i32 %125 = load i32* %124; + i32 %126 = mul i32 %123, i32 %125; + i32 %127 = add i32 %121, i32 %126; + i32* %128 = getelementptr &(i32* %12)[i64 19]; + i32 %129 = load i32* %128; + i32* %130 = getelementptr &(i32* %14)[i64 19]; + i32 %131 = load i32* %130; + i32 %132 = mul i32 %129, i32 %131; + i32 %133 = add i32 %127, i32 %132; + i32* %134 = getelementptr &(i32* %12)[i64 20]; i32 %135 = load i32* %134; - i32 %136 = load i32* %133; - i32 %137 = mul i32 %135, i32 %136; - i32 %138 = add i32 %131, i32 %137; - [1000 * i32]* %139 = getelementptr &([1000 * i32]* %11)[i64 18]; - i32* %140 = getelementptr &([1000 * i32]* %139)[i64 0][i32 %8]; - i32* %141 = getelementptr &(i32* %13)[i64 18]; - i32 %142 = load i32* %141; - i32 %143 = load i32* %140; - i32 %144 = mul i32 %142, i32 %143; - i32 %145 = add i32 %138, i32 %144; - [1000 * i32]* %146 = getelementptr &([1000 * i32]* %11)[i64 19]; - i32* %147 = getelementptr &([1000 * i32]* %146)[i64 0][i32 %8]; - i32* %148 = getelementptr &(i32* %13)[i64 19]; + i32* %136 = getelementptr &(i32* %14)[i64 20]; + i32 %137 = load i32* %136; + i32 %138 = mul i32 %135, i32 %137; + i32 %139 = add i32 %133, i32 %138; + i32* %140 = getelementptr &(i32* %12)[i64 21]; + i32 %141 = load i32* %140; + i32* %142 = getelementptr &(i32* %14)[i64 21]; + i32 %143 = load i32* %142; + i32 %144 = mul i32 %141, i32 %143; + i32 %145 = add i32 %139, i32 %144; + i32* %146 = getelementptr &(i32* %12)[i64 22]; + i32 %147 = load i32* %146; + i32* %148 = getelementptr &(i32* %14)[i64 22]; i32 %149 = load i32* %148; - i32 %150 = load i32* %147; - i32 %151 = mul i32 %149, i32 %150; - i32 %152 = add i32 %145, i32 %151; - [1000 * i32]* %153 = getelementptr &([1000 * i32]* %11)[i64 20]; - i32* %154 = getelementptr &([1000 * i32]* %153)[i64 0][i32 %8]; - i32* %155 = getelementptr &(i32* %13)[i64 20]; - i32 %156 = load i32* %155; - i32 %157 = load i32* %154; - i32 %158 = mul i32 %156, i32 %157; - i32 %159 = add i32 %152, i32 %158; - [1000 * i32]* %160 = getelementptr &([1000 * i32]* %11)[i64 21]; - i32* %161 = getelementptr &([1000 * i32]* %160)[i64 0][i32 %8]; - i32* %162 = getelementptr &(i32* %13)[i64 21]; - i32 %163 = load i32* %162; - i32 %164 = load i32* %161; - i32 %165 = mul i32 %163, i32 %164; - i32 %166 = add i32 %159, i32 %165; - [1000 * i32]* %167 = getelementptr &([1000 * i32]* %11)[i64 22]; - i32* %168 = getelementptr &([1000 * i32]* %167)[i64 0][i32 %8]; - i32* %169 = getelementptr &(i32* %13)[i64 22]; - i32 %170 = load i32* %169; - i32 %171 = load i32* %168; - i32 %172 = mul i32 %170, i32 %171; - i32 %173 = add i32 %166, i32 %172; - [1000 * i32]* %174 = getelementptr &([1000 * i32]* %11)[i64 23]; - i32* %175 = getelementptr &([1000 * i32]* %174)[i64 0][i32 %8]; - i32* %176 = getelementptr &(i32* %13)[i64 23]; - i32 %177 = load i32* %176; - i32 %178 = load i32* %175; - i32 %179 = mul i32 %177, i32 %178; - i32 %180 = add i32 %173, i32 %179; - i32* %181 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %8]; - store i32* %181 with i32 %180; - i32 %182 = add i32 %8, i32 1; - i1 %183 = icmp slt i32 %182, i32 1000; - cbr i1 %183(prob = 0.999), ^while.body, ^b2; + i32 %150 = mul i32 %147, i32 %149; + i32 %151 = add i32 %145, i32 %150; + i32* %152 = getelementptr &(i32* %12)[i64 23]; + i32 %153 = load i32* %152; + i32* %154 = getelementptr &(i32* %14)[i64 23]; + i32 %155 = load i32* %154; + i32 %156 = mul i32 %153, i32 %155; + i32 %157 = add i32 %151, i32 %156; + i32* %158 = getelementptr &([1000 * i32]* %6)[i64 0][i32 %8]; + store i32* %158 with i32 %157; + i32 %159 = add i32 %8, i32 1; + i1 %160 = icmp slt i32 %159, i32 1000; + cbr i1 %160(prob = 0.999), ^while.body, ^b2; ^b2: - i32 %184 = add i32 %5, i32 1; - i1 %185 = icmp sgt i32 %1, i32 %184; - cbr i1 %185(prob = 0.984615), ^b1, ^b3; + i32 %161 = add i32 %5, i32 1; + i1 %162 = icmp sgt i32 %1, i32 %161; + cbr i1 %162(prob = 0.984615), ^b1, ^b3; ^b3: ret; } diff --git a/tests/SysY2022/performance/vector_mul1.riscv.s b/tests/SysY2022/performance/vector_mul1.riscv.s index 4fdceeca4..184ba6feb 100644 --- a/tests/SysY2022/performance/vector_mul1.riscv.s +++ b/tests/SysY2022/performance/vector_mul1.riscv.s @@ -29,30 +29,47 @@ main: sd s1, 16(sp) jal _sysy_starttime lui a3, 24 -pcrel1011: +pcrel1019: auipc a0, %pcrel_hi(cmmc_parallel_body_payload_0) -pcrel1012: +pcrel1020: auipc a1, %pcrel_hi(vectorA) - addi s0, a1, %pcrel_lo(pcrel1012) -pcrel1013: + addi s0, a1, %pcrel_lo(pcrel1020) +pcrel1021: auipc a1, %pcrel_hi(cmmc_parallel_body_0) - sd s0, %pcrel_lo(pcrel1011)(a0) - addi a2, a1, %pcrel_lo(pcrel1013) + sd s0, %pcrel_lo(pcrel1019)(a0) + addi a2, a1, %pcrel_lo(pcrel1021) mv a0, zero addiw a1, a3, 1696 jal cmmcParallelFor -pcrel1014: +pcrel1022: auipc a1, %pcrel_hi(Vectortm) mv a4, zero -pcrel1015: +pcrel1023: auipc a0, %pcrel_hi(vectorB) - addi a3, a1, %pcrel_lo(pcrel1014) - addi s1, a0, %pcrel_lo(pcrel1015) + addi a3, a1, %pcrel_lo(pcrel1022) + addi s1, a0, %pcrel_lo(pcrel1023) + j label111 +.p2align 2 +label201: + addiw a4, a4, 1 + li a0, 1000 + bge a4, a0, label202 +.p2align 2 label111: fmv.w.x f10, zero mv a2, a3 mv t0, zero mv a0, zero + j label113 +.p2align 2 +label118: + fsw f10, 0(a2) + lui t0, 24 + addiw a1, t0, 1696 + bge a5, a1, label121 + addi a2, a2, 4 + mv t0, a5 +.p2align 2 label113: addiw a5, t0, 1 lui t1, 24 @@ -61,48 +78,86 @@ label113: addiw a1, a0, 3 lui t2, 24 addiw t1, t2, 1696 - blt a1, t1, label233 - fmv.w.x f11, zero -label223: - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label757 + bge a1, t1, label752 sh2add a1, a0, s0 -label228: - addw t2, t0, a0 + addiw t1, t0, 2 + addiw t2, t0, 3 + addiw t3, t0, 4 +.p2align 2 +label234: + addw t6, t0, a0 addw t4, a5, a0 - mulw t3, t2, t4 - mv t1, t3 - bge t3, zero, label978 - addiw t1, t3, 1 -label978: - sraiw t2, t1, 1 - addiw a0, a0, 1 - flw f13, 0(a1) - addw t3, a5, t2 - lui t2, 24 - fcvt.s.w f11, t3 - addiw t1, t2, 1696 - fdiv.s f12, f13, f11 - fadd.s f10, f10, f12 - blt a0, t1, label232 - lui a1, 24 - addiw a0, a1, 1696 -label118: - fsw f10, 0(a2) - lui t0, 24 - addiw a1, t0, 1696 - bge a5, a1, label121 - addi a2, a2, 4 - mv t0, a5 - j label113 + mulw a6, t6, t4 + mv t5, a6 + bge a6, zero, label982 + addiw t5, a6, 1 +label982: + sraiw t6, t5, 1 + flw f12, 0(a1) + addw t5, t1, a0 + addw a6, a5, t6 + mulw t6, t4, t5 + fcvt.s.w f11, a6 + mv t4, t6 + fdiv.s f13, f12, f11 + fadd.s f10, f10, f13 + bge t6, zero, label984 + addiw t4, t6, 1 +label984: + sraiw a6, t4, 1 + flw f14, 4(a1) + addw t4, t2, a0 + addw t6, a5, a6 + fcvt.s.w f12, t6 + mulw t6, t5, t4 + fdiv.s f13, f14, f12 + mv t5, t6 + fadd.s f11, f10, f13 + bge t6, zero, label986 + addiw t5, t6, 1 +label986: + sraiw t6, t5, 1 + flw f13, 8(a1) + addw a6, a5, t6 + addw t6, t3, a0 + fcvt.s.w f12, a6 + mulw t5, t4, t6 + fdiv.s f14, f13, f12 + mv t4, t5 + fadd.s f10, f11, f14 + bge t5, zero, label988 + addiw t4, t5, 1 +label988: + sraiw t5, t4, 1 + addiw a0, a0, 4 + flw f12, 12(a1) + addw t6, a5, t5 + lui t5, 24 + fcvt.s.w f11, t6 + addiw t4, t5, 1693 + fdiv.s f13, f12, f11 + fadd.s f10, f10, f13 + bge a0, t4, label820 + addi a1, a1, 16 + j label234 +.p2align 2 label121: fmv.w.x f10, zero mv a2, s1 - mv t0, zero + mv a5, zero mv a0, zero + j label122 +.p2align 2 +label144: + fsw f10, 0(a2) + lui a5, 24 + addiw a1, a5, 1696 + bge t0, a1, label343 + addi a2, a2, 4 + mv a5, t0 +.p2align 2 label122: - addiw a5, t0, 1 + addiw t0, a5, 1 lui t1, 24 addiw a1, t1, 1696 bge a0, a1, label144 @@ -111,315 +166,146 @@ label122: addiw t1, t2, 1696 bge a1, t1, label267 sh2add a1, a0, a3 - addiw t1, t0, 2 - addiw t2, t0, 3 - addiw t3, t0, 4 - fmv.s f11, f10 + addiw t1, a5, 2 + addiw t2, a5, 3 + addiw t3, a5, 4 +.p2align 2 label139: - addw t5, t0, a0 addw t6, a5, a0 - mulw a6, t5, t6 - mv t4, a6 + addw t4, t0, a0 + mulw a6, t6, t4 + mv t5, a6 bge a6, zero, label927 - addiw t4, a6, 1 + addiw t5, a6, 1 label927: - sraiw t5, t4, 1 + sraiw t6, t5, 1 addiw a6, a0, 1 flw f13, 0(a1) - addw t4, t1, a0 - addw a7, t5, a6 - mulw a6, t6, t4 + addw t5, t1, a0 + addw a7, t6, a6 + mulw a6, t4, t5 fcvt.s.w f12, a7 - mv t5, a6 + mv t6, a6 fdiv.s f14, f13, f12 - fadd.s f10, f11, f14 + fadd.s f11, f10, f14 bge a6, zero, label929 - addiw t5, a6, 1 + addiw t6, a6, 1 label929: - sraiw t6, t5, 1 + sraiw t4, t6, 1 addiw a7, a0, 2 flw f14, 4(a1) - addw t5, t2, a0 - addw a6, t6, a7 - mulw t6, t4, t5 + addw a6, t4, a7 + addw t4, t2, a0 fcvt.s.w f12, a6 - mv t4, t6 + mulw t6, t5, t4 fdiv.s f13, f14, f12 - fadd.s f11, f10, f13 + mv t5, t6 + fadd.s f10, f11, f13 bge t6, zero, label931 - addiw t4, t6, 1 + addiw t5, t6, 1 label931: - sraiw t6, t4, 1 - addiw a7, a0, 3 + sraiw t6, t5, 1 + addiw a6, a0, 3 flw f14, 8(a1) - addw a6, t6, a7 - addw t6, t3, a0 - fcvt.s.w f12, a6 - mulw a6, t5, t6 + addw t5, t3, a0 + addw a7, t6, a6 + mulw t6, t4, t5 + fcvt.s.w f12, a7 + mv t4, t6 fdiv.s f13, f14, f12 - mv t4, a6 - fadd.s f10, f11, f13 - bge a6, zero, label933 - addiw t4, a6, 1 + fadd.s f11, f10, f13 + bge t6, zero, label933 + addiw t4, t6, 1 label933: sraiw t5, t4, 1 addiw a0, a0, 4 - flw f14, 12(a1) + flw f13, 12(a1) addw t6, t5, a0 lui t5, 24 fcvt.s.w f12, t6 addiw t4, t5, 1693 - fdiv.s f13, f14, f12 - fadd.s f11, f10, f13 + fdiv.s f14, f13, f12 + fadd.s f10, f11, f14 bge a0, t4, label338 addi a1, a1, 16 j label139 -label267: - fmv.w.x f11, zero +.p2align 2 +label338: + fmv.s f11, f10 +.p2align 2 label128: lui t1, 24 addiw a1, t1, 1696 bge a0, a1, label272 sh2add a1, a0, a3 +.p2align 2 +label133: + addw t2, a5, a0 + addw t3, t0, a0 + mulw t4, t2, t3 + mv t1, t4 + bge t4, zero, label923 + addiw t1, t4, 1 +label923: + sraiw t2, t1, 1 + addiw a0, a0, 1 + flw f13, 0(a1) + addw t3, t2, a0 + lui t2, 24 + fcvt.s.w f10, t3 + addiw t1, t2, 1696 + fdiv.s f12, f13, f10 + fadd.s f11, f11, f12 + bge a0, t1, label288 + addi a1, a1, 4 j label133 -label272: - lui t0, 24 - fmv.s f10, f11 - addiw a0, t0, 1696 -label144: - fsw f10, 0(a2) - lui t0, 24 - addiw a1, t0, 1696 - bge a5, a1, label343 - addi a2, a2, 4 - mv t0, a5 - j label122 -label343: - fmv.w.x f10, zero - mv a2, a3 - mv t0, zero - mv a0, zero -label149: - addiw a5, t0, 1 +.p2align 2 +label820: + fmv.s f11, f10 +.p2align 2 +label223: lui t1, 24 addiw a1, t1, 1696 - bge a0, a1, label171 - addiw a1, a0, 3 + bge a0, a1, label757 + sh2add a1, a0, s0 + j label228 +.p2align 2 +label232: + addi a1, a1, 4 +.p2align 2 +label228: + addw t2, t0, a0 + addw t4, a5, a0 + mulw t3, t2, t4 + mv t1, t3 + bge t3, zero, label978 + addiw t1, t3, 1 +label978: + sraiw t2, t1, 1 + addiw a0, a0, 1 + flw f13, 0(a1) + addw t3, a5, t2 lui t2, 24 + fcvt.s.w f10, t3 addiw t1, t2, 1696 - bge a1, t1, label355 - sh2add a1, a0, s1 - addiw t1, t0, 2 - addiw t2, t0, 3 - addiw t3, t0, 4 -label156: - addw t6, t0, a0 - addw t5, a5, a0 - mulw a6, t6, t5 - mv t4, a6 - bge a6, zero, label939 - addiw t4, a6, 1 -label939: - sraiw a6, t4, 1 - flw f13, 0(a1) - addw t4, t1, a0 - addw t6, a5, a6 - fcvt.s.w f12, t6 - mulw t6, t5, t4 - fdiv.s f14, f13, f12 - mv t5, t6 - fadd.s f11, f10, f14 - bge t6, zero, label941 - addiw t5, t6, 1 -label941: - sraiw a6, t5, 1 - flw f14, 4(a1) - addw t5, t2, a0 - addw t6, a5, a6 - fcvt.s.w f12, t6 - mulw t6, t4, t5 - fdiv.s f13, f14, f12 - mv t4, t6 - fadd.s f10, f11, f13 - bge t6, zero, label943 - addiw t4, t6, 1 -label943: - sraiw a6, t4, 1 - flw f14, 8(a1) - addw t6, a5, a6 - addw a6, t3, a0 - fcvt.s.w f12, t6 - mulw t6, t5, a6 - fdiv.s f13, f14, f12 - mv t4, t6 - fadd.s f11, f10, f13 - bge t6, zero, label945 - addiw t4, t6, 1 -label945: - sraiw t5, t4, 1 - addiw a0, a0, 4 - flw f12, 12(a1) - addw t6, a5, t5 - lui t5, 24 - fcvt.s.w f10, t6 - addiw t4, t5, 1693 - fdiv.s f13, f12, f10 - fadd.s f11, f11, f13 - bge a0, t4, label402 - addi a1, a1, 16 - fmv.s f10, f11 - j label156 -label137: - addi a1, a1, 4 -label133: - addw t2, t0, a0 - addw t3, a5, a0 - mulw t4, t2, t3 - mv t1, t4 - bge t4, zero, label923 - addiw t1, t4, 1 -label923: - sraiw t2, t1, 1 - addiw a0, a0, 1 - flw f13, 0(a1) - addw t3, t2, a0 - lui t2, 24 - fcvt.s.w f11, t3 - addiw t1, t2, 1696 - fdiv.s f12, f13, f11 - fadd.s f10, f10, f12 - blt a0, t1, label137 + fdiv.s f12, f13, f10 + fadd.s f11, f11, f12 + blt a0, t1, label232 lui a1, 24 - addiw a0, a1, 1696 - j label144 -label402: - fmv.s f10, f11 -label161: - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label407 - sh2add a1, a0, s1 -label166: - addw t2, t0, a0 - addw t3, a5, a0 - mulw t4, t2, t3 - mv t1, t4 - bge t4, zero, label950 - addiw t1, t4, 1 -label950: - sraiw t2, t1, 1 - addiw a0, a0, 1 - flw f13, 0(a1) - addw t3, a5, t2 - lui t2, 24 - fcvt.s.w f11, t3 - addiw t1, t2, 1696 - fdiv.s f12, f13, f11 - fadd.s f10, f10, f12 - bge a0, t1, label423 - addi a1, a1, 4 - j label166 -label407: - lui t0, 24 fmv.s f10, f11 - addiw a0, t0, 1696 -label171: - fsw f10, 0(a2) - lui t0, 24 - addiw a1, t0, 1696 - bge a5, a1, label174 - addi a2, a2, 4 - mv t0, a5 - j label149 -label174: - fmv.w.x f10, zero - mv a2, s0 - mv a5, zero - mv a0, zero -label175: - addiw t0, a5, 1 - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label197 - addiw a1, a0, 3 - lui t2, 24 - addiw t1, t2, 1696 - bge a1, t1, label439 - sh2add a1, a0, a3 - addiw t1, a5, 2 - addiw t2, a5, 3 - addiw t3, a5, 4 -label182: - addw t6, a5, a0 - addw t4, t0, a0 - mulw a6, t6, t4 - mv t5, a6 - bge a6, zero, label957 - addiw t5, a6, 1 -label957: - sraiw t6, t5, 1 - addiw a6, a0, 1 - flw f12, 0(a1) - addw t5, t1, a0 - addw a7, t6, a6 - mulw a6, t4, t5 - fcvt.s.w f11, a7 - mv t6, a6 - fdiv.s f13, f12, f11 - fadd.s f10, f10, f13 - bge a6, zero, label959 - addiw t6, a6, 1 -label959: - sraiw t4, t6, 1 - addiw a6, a0, 2 - flw f13, 4(a1) - addw a7, t4, a6 - addw t4, t2, a0 - fcvt.s.w f12, a7 - mulw t6, t5, t4 - fdiv.s f14, f13, f12 - mv t5, t6 - fadd.s f11, f10, f14 - bge t6, zero, label961 - addiw t5, t6, 1 -label961: - sraiw t6, t5, 1 - addiw a6, a0, 3 - flw f14, 8(a1) - addw t5, t3, a0 - addw a7, t6, a6 - mulw a6, t4, t5 - fcvt.s.w f12, a7 - mv t6, a6 - fdiv.s f13, f14, f12 - fadd.s f10, f11, f13 - bge a6, zero, label963 - addiw t6, a6, 1 -label963: - sraiw t4, t6, 1 - addiw a0, a0, 4 - flw f13, 12(a1) - addw t5, t4, a0 - fcvt.s.w f11, t5 - lui t5, 24 - fdiv.s f12, f13, f11 - addiw t4, t5, 1693 - fadd.s f10, f10, f12 - blt a0, t4, label186 - fmv.s f11, f10 - j label187 -label197: - fsw f10, 0(a2) - lui a5, 24 - addiw a1, a5, 1696 - blt t0, a1, label200 - addiw a4, a4, 1 - li a0, 1000 - blt a4, a0, label111 + addiw a0, a1, 1696 + j label118 +label202: li a0, 76 jal _sysy_stoptime mv a0, zero fmv.w.x f10, zero + j label203 +.p2align 2 +label207: + addi s0, s0, 64 + fmv.s f10, f11 +.p2align 2 label203: sh2add a1, a0, s1 flw f13, 0(s0) @@ -433,63 +319,50 @@ label203: flw f15, 8(a1) fmadd.s f10, f12, f14, f11 flw f14, 12(s0) - flw f1, 12(a1) + flw f0, 12(a1) flw f12, 16(s0) - flw f0, 16(a1) fmadd.s f11, f13, f15, f10 - flw f13, 20(s0) + flw f13, 16(a1) + fmadd.s f10, f14, f0, f11 + flw f14, 20(s0) flw f15, 20(a1) - fmadd.s f10, f14, f1, f11 - flw f14, 24(s0) - fmadd.s f11, f12, f0, f10 + fmadd.s f11, f12, f13, f10 + flw f13, 24(s0) flw f0, 24(a1) - fmadd.s f10, f13, f15, f11 - flw f13, 28(s0) + flw f12, 28(s0) + fmadd.s f10, f14, f15, f11 flw f15, 28(a1) - fmadd.s f12, f14, f0, f10 flw f14, 32(s0) + fmadd.s f11, f13, f0, f10 flw f0, 32(a1) - fmadd.s f11, f13, f15, f12 flw f13, 36(s0) + fmadd.s f10, f12, f15, f11 flw f15, 36(a1) - flw f12, 40(s0) - flw f1, 40(a1) - fmadd.s f10, f14, f0, f11 - flw f14, 44(s0) - flw f0, 44(a1) - fmadd.s f11, f13, f15, f10 - flw f13, 48(s0) - flw f15, 48(a1) - fmadd.s f10, f12, f1, f11 fmadd.s f11, f14, f0, f10 - flw f14, 52(s0) - flw f0, 52(a1) + flw f14, 40(s0) + flw f0, 40(a1) fmadd.s f12, f13, f15, f11 - flw f11, 56(s0) - flw f15, 56(a1) - flw f13, 60(s0) + flw f11, 44(s0) + flw f13, 44(a1) fmadd.s f10, f14, f0, f12 - flw f14, 60(a1) + flw f14, 48(s0) + flw f15, 48(a1) + fmadd.s f12, f11, f13, f10 + flw f13, 52(s0) + flw f1, 52(a1) + fmadd.s f11, f14, f15, f12 + flw f14, 56(s0) + flw f0, 56(a1) + fmadd.s f10, f13, f1, f11 + flw f13, 60(s0) + flw f15, 60(a1) addiw a1, a2, 1696 - fmadd.s f12, f11, f15, f10 - fmadd.s f11, f13, f14, f12 + fmadd.s f12, f14, f0, f10 + fmadd.s f11, f13, f15, f12 blt a0, a1, label207 fmv.w.x f10, zero mv a0, zero - j label209 -label439: - fmv.w.x f12, zero - fmv.s f11, f10 - fmv.s f10, f12 -label187: - lui t1, 24 - addiw a1, t1, 1696 - blt a0, a1, label191 - lui a5, 24 - addiw a0, a5, 1696 - j label197 -label219: - addi s1, s1, 64 +.p2align 2 label209: flw f14, 0(s1) addiw a0, a0, 16 @@ -526,54 +399,58 @@ label209: flw f14, 60(s1) fmadd.s f13, f15, f15, f12 fmadd.s f10, f14, f14, f13 - blt a0, a1, label219 + bge a0, a1, label213 + addi s1, s1, 64 + j label209 +label213: fdiv.s f12, f11, f10 lui a0, 260096 -pcrel1016: +pcrel1024: auipc a2, %pcrel_hi(__cmmc_fp_constant_pool) fmv.w.x f13, a0 - addi a0, a2, %pcrel_lo(pcrel1016) - flw f11, 0(a0) + addi a0, a2, %pcrel_lo(pcrel1024) fsub.s f10, f13, f12 - flw f13, 4(a0) - flt.s a1, f11, f10 - flt.s a2, f10, f13 + flw f13, 0(a0) + flw f11, 4(a0) + flt.s a1, f13, f10 + flt.s a2, f10, f11 or a3, a1, a2 - bne a3, zero, label710 - lui a0, 260096 - fmv.w.x f11, a0 - j label217 -label710: + beq a3, zero, label709 lui a0, 260096 - fmv.s f10, f12 - fmv.w.x f11, a0 + fmv.s f11, f12 + fmv.w.x f10, a0 +.p2align 2 label214: - fadd.s f14, f11, f10 + fadd.s f14, f10, f11 lui a0, 258048 -pcrel1017: +pcrel1025: auipc a2, %pcrel_hi(__cmmc_fp_constant_pool) fmv.w.x f15, a0 - addi a0, a2, %pcrel_lo(pcrel1017) - fmul.s f11, f14, f15 + addi a0, a2, %pcrel_lo(pcrel1025) + fmul.s f10, f14, f15 flw f14, 0(a0) flw f15, 4(a0) - fdiv.s f10, f12, f11 - fsub.s f13, f11, f10 + fdiv.s f11, f12, f10 + fsub.s f13, f10, f11 flt.s a2, f13, f15 flt.s a1, f14, f13 or a3, a1, a2 bne a3, zero, label214 + j label217 +label709: + lui a0, 260096 + fmv.w.x f10, a0 label217: lui a0, 260096 -pcrel1018: +pcrel1026: auipc a3, %pcrel_hi(__cmmc_fp_constant_pool) fmv.w.x f12, a0 - addi a1, a3, %pcrel_lo(pcrel1018) - fsub.s f10, f11, f12 + addi a1, a3, %pcrel_lo(pcrel1026) + fsub.s f11, f10, f12 flw f12, 0(a1) - flw f11, 4(a1) - fle.s a2, f10, f12 - fle.s a3, f11, f10 + flw f10, 4(a1) + fle.s a2, f11, f12 + fle.s a3, f10, f11 and a0, a2, a3 jal putint li a0, 10 @@ -584,128 +461,305 @@ pcrel1018: ld s1, 16(sp) addi sp, sp, 24 ret -label191: - sh2add a1, a0, a3 -label192: - addw t2, a5, a0 - addw t3, t0, a0 - mulw t4, t2, t3 - mv t1, t4 - bge t4, zero, label968 - addiw t1, t4, 1 -label968: - sraiw t2, t1, 1 - addiw a0, a0, 1 - flw f13, 0(a1) - addw t3, t2, a0 +.p2align 2 +label343: + fmv.w.x f10, zero + mv a2, a3 + mv a5, zero + mv a0, zero +.p2align 2 +label149: + addiw t0, a5, 1 + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label171 + addiw a1, a0, 3 lui t2, 24 - fcvt.s.w f10, t3 addiw t1, t2, 1696 - fdiv.s f12, f13, f10 - fadd.s f11, f11, f12 - blt a0, t1, label196 - lui a1, 24 - fmv.s f10, f11 - addiw a0, a1, 1696 - j label197 -label423: - lui a1, 24 - addiw a0, a1, 1696 - j label171 -label757: - lui t0, 24 - fmv.s f10, f11 - addiw a0, t0, 1696 - j label118 -label207: - addi s0, s0, 64 - fmv.s f10, f11 - j label203 -label196: - addi a1, a1, 4 - j label192 -label200: + bge a1, t1, label355 + sh2add a1, a0, s1 + addiw t1, a5, 2 + addiw t3, a5, 3 + addiw t2, a5, 4 + j label156 +.p2align 2 +label171: + fsw f10, 0(a2) + lui a5, 24 + addiw a1, a5, 1696 + bge t0, a1, label174 addi a2, a2, 4 mv a5, t0 - j label175 -label186: - addi a1, a1, 16 - j label182 -label232: - addi a1, a1, 4 - j label228 -label338: - fmv.s f10, f11 - j label128 -label233: - sh2add a1, a0, s0 - addiw t1, t0, 2 - addiw t2, t0, 3 - addiw t3, t0, 4 - j label234 -label238: - addi a1, a1, 16 - fmv.s f10, f11 -label234: - addw t6, t0, a0 - addw t4, a5, a0 - mulw a6, t6, t4 - mv t5, a6 - bge a6, zero, label982 - addiw t5, a6, 1 -label982: - sraiw t6, t5, 1 - flw f12, 0(a1) - addw a6, a5, t6 - addw t6, t1, a0 - fcvt.s.w f11, a6 - mulw t5, t4, t6 - fdiv.s f13, f12, f11 - mv t4, t5 - fadd.s f10, f10, f13 - bge t5, zero, label984 - addiw t4, t5, 1 -label984: - sraiw a6, t4, 1 - flw f14, 4(a1) - addw t4, t2, a0 - addw t5, a5, a6 - mulw a6, t6, t4 - fcvt.s.w f12, t5 - mv t5, a6 - fdiv.s f13, f14, f12 - fadd.s f11, f10, f13 - bge a6, zero, label986 - addiw t5, a6, 1 -label986: + j label149 +.p2align 2 +label939: sraiw t6, t5, 1 - flw f13, 8(a1) - addw a6, a5, t6 + flw f13, 0(a1) + addw t5, t1, a0 + addw a6, t0, t6 + mulw t6, t4, t5 fcvt.s.w f12, a6 - addw a6, t3, a0 + mv t4, t6 fdiv.s f14, f13, f12 + fadd.s f11, f10, f14 + bge t6, zero, label941 + addiw t4, t6, 1 +label941: + sraiw a6, t4, 1 + flw f13, 4(a1) + addw t4, t3, a0 + addw t6, t0, a6 + fcvt.s.w f12, t6 + mulw t6, t5, t4 + fdiv.s f14, f13, f12 + mv a6, t6 + fadd.s f10, f11, f14 + bge t6, zero, label943 + addiw a6, t6, 1 +label943: + sraiw t6, a6, 1 + flw f14, 8(a1) + addw a6, t2, a0 + addw t5, t0, t6 mulw t6, t4, a6 + fcvt.s.w f12, t5 mv t5, t6 - fadd.s f10, f11, f14 - bge t6, zero, label988 + fdiv.s f13, f14, f12 + fadd.s f11, f10, f13 + bge t6, zero, label945 addiw t5, t6, 1 -label988: +label945: sraiw t4, t5, 1 addiw a0, a0, 4 flw f13, 12(a1) lui t5, 24 - addw t6, a5, t4 + addw t6, t0, t4 addiw t4, t5, 1693 fcvt.s.w f12, t6 fdiv.s f14, f13, f12 + fadd.s f10, f11, f14 + bge a0, t4, label402 + addi a1, a1, 16 +.p2align 2 +label156: + addw t6, a5, a0 + addw t4, t0, a0 + mulw a6, t6, t4 + mv t5, a6 + bge a6, zero, label939 + addiw t5, a6, 1 + j label939 +.p2align 2 +label355: + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 +.p2align 2 +label161: + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label407 + sh2add a1, a0, s1 + fmv.s f10, f11 +.p2align 2 +label166: + addw t2, a5, a0 + addw t3, t0, a0 + mulw t4, t2, t3 + mv t1, t4 + bge t4, zero, label950 + addiw t1, t4, 1 +label950: + sraiw t2, t1, 1 + addiw a0, a0, 1 + flw f12, 0(a1) + addw t3, t0, t2 + lui t2, 24 + fcvt.s.w f11, t3 + addiw t1, t2, 1696 + fdiv.s f13, f12, f11 + fadd.s f10, f10, f13 + bge a0, t1, label423 + addi a1, a1, 4 + j label166 +.p2align 2 +label174: + fmv.w.x f10, zero + mv a2, s0 + mv t0, zero + mv a0, zero + j label175 +.p2align 2 +label494: + lui t0, 24 + addiw a0, t0, 1696 +.p2align 2 +label197: + fsw f10, 0(a2) + lui t0, 24 + addiw a1, t0, 1696 + bge a5, a1, label201 + addi a2, a2, 4 + mv t0, a5 +.p2align 2 +label175: + addiw a5, t0, 1 + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label197 + addiw a1, a0, 3 + lui t2, 24 + addiw t1, t2, 1696 + bge a1, t1, label439 + sh2add a1, a0, a3 + addiw t1, t0, 2 + addiw t2, t0, 3 + addiw t3, t0, 4 +.p2align 2 +label182: + addw t6, t0, a0 + addw t5, a5, a0 + mulw a6, t6, t5 + mv t4, a6 + bge a6, zero, label957 + addiw t4, a6, 1 +label957: + sraiw t6, t4, 1 + addiw a7, a0, 1 + flw f13, 0(a1) + addw t4, t1, a0 + addw a6, t6, a7 + mulw t6, t5, t4 + fcvt.s.w f11, a6 + mv t5, t6 + fdiv.s f12, f13, f11 + fadd.s f10, f10, f12 + bge t6, zero, label959 + addiw t5, t6, 1 +label959: + sraiw t6, t5, 1 + addiw a6, a0, 2 + flw f13, 4(a1) + addw t5, t2, a0 + addw a7, t6, a6 + mulw a6, t4, t5 + fcvt.s.w f12, a7 + mv t6, a6 + fdiv.s f14, f13, f12 fadd.s f11, f10, f14 - blt a0, t4, label238 + bge a6, zero, label961 + addiw t6, a6, 1 +label961: + sraiw t4, t6, 1 + addiw a6, a0, 3 + flw f14, 8(a1) + addw t6, t3, a0 + addw a7, t4, a6 + mulw a6, t5, t6 + fcvt.s.w f12, a7 + mv t4, a6 + fdiv.s f13, f14, f12 + fadd.s f10, f11, f13 + bge a6, zero, label963 + addiw t4, a6, 1 +label963: + sraiw t5, t4, 1 + addiw a0, a0, 4 + flw f12, 12(a1) + addw t6, t5, a0 + lui t5, 24 + fcvt.s.w f11, t6 + addiw t4, t5, 1693 + fdiv.s f13, f12, f11 + fadd.s f10, f10, f13 + bge a0, t4, label489 + addi a1, a1, 16 + j label182 +.p2align 2 +label489: + fmv.s f11, f10 +.p2align 2 +label187: + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label494 + sh2add a1, a0, a3 fmv.s f10, f11 - j label223 -label355: - fmv.w.x f11, zero +.p2align 2 +label192: + addw t2, t0, a0 + addw t3, a5, a0 + mulw t4, t2, t3 + mv t1, t4 + bge t4, zero, label968 + addiw t1, t4, 1 +label968: + sraiw t2, t1, 1 + addiw a0, a0, 1 + flw f13, 0(a1) + addw t3, t2, a0 + lui t2, 24 + fcvt.s.w f11, t3 + addiw t1, t2, 1696 + fdiv.s f12, f13, f11 + fadd.s f10, f10, f12 + bge a0, t1, label510 + addi a1, a1, 4 + j label192 +.p2align 2 +label423: + lui a1, 24 + addiw a0, a1, 1696 + j label171 +.p2align 2 +label757: + lui t0, 24 + addiw a0, t0, 1696 + j label118 +.p2align 2 +label402: + fmv.s f11, f10 j label161 .p2align 2 +label510: + lui a1, 24 + addiw a0, a1, 1696 + j label197 +.p2align 2 +label439: + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 + j label187 +.p2align 2 +label752: + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 + j label223 +.p2align 2 +label407: + lui a5, 24 + addiw a0, a5, 1696 + j label171 +.p2align 2 +label272: + lui a5, 24 + addiw a0, a5, 1696 + j label144 +.p2align 2 +label267: + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 + j label128 +.p2align 2 +label288: + lui a1, 24 + fmv.s f10, f11 + addiw a0, a1, 1696 + j label144 +.p2align 2 cmmc_parallel_body_0: mv a3, a0 addiw a5, a0, 3 diff --git a/tests/SysY2022/performance/vector_mul2.riscv.s b/tests/SysY2022/performance/vector_mul2.riscv.s index 4fdceeca4..3e570f0f9 100644 --- a/tests/SysY2022/performance/vector_mul2.riscv.s +++ b/tests/SysY2022/performance/vector_mul2.riscv.s @@ -29,48 +29,45 @@ main: sd s1, 16(sp) jal _sysy_starttime lui a3, 24 -pcrel1011: +pcrel1015: auipc a0, %pcrel_hi(cmmc_parallel_body_payload_0) -pcrel1012: +pcrel1016: auipc a1, %pcrel_hi(vectorA) - addi s0, a1, %pcrel_lo(pcrel1012) -pcrel1013: + addi s0, a1, %pcrel_lo(pcrel1016) +pcrel1017: auipc a1, %pcrel_hi(cmmc_parallel_body_0) - sd s0, %pcrel_lo(pcrel1011)(a0) - addi a2, a1, %pcrel_lo(pcrel1013) + sd s0, %pcrel_lo(pcrel1015)(a0) + addi a2, a1, %pcrel_lo(pcrel1017) mv a0, zero addiw a1, a3, 1696 jal cmmcParallelFor -pcrel1014: +pcrel1018: auipc a1, %pcrel_hi(Vectortm) mv a4, zero -pcrel1015: +pcrel1019: auipc a0, %pcrel_hi(vectorB) - addi a3, a1, %pcrel_lo(pcrel1014) - addi s1, a0, %pcrel_lo(pcrel1015) + addi a2, a1, %pcrel_lo(pcrel1018) + addi s1, a0, %pcrel_lo(pcrel1019) + j label111 +.p2align 2 +label201: + addiw a4, a4, 1 + li a0, 1000 + bge a4, a0, label202 +.p2align 2 label111: fmv.w.x f10, zero - mv a2, a3 - mv t0, zero + mv a1, a2 + mv a5, zero mv a0, zero -label113: - addiw a5, t0, 1 - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label118 - addiw a1, a0, 3 - lui t2, 24 - addiw t1, t2, 1696 - blt a1, t1, label233 - fmv.w.x f11, zero -label223: - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label757 - sh2add a1, a0, s0 + j label113 +.p2align 2 +label232: + addi a3, a3, 4 +.p2align 2 label228: - addw t2, t0, a0 - addw t4, a5, a0 + addw t2, a5, a0 + addw t4, t0, a0 mulw t3, t2, t4 mv t1, t3 bge t3, zero, label978 @@ -78,199 +75,184 @@ label228: label978: sraiw t2, t1, 1 addiw a0, a0, 1 - flw f13, 0(a1) - addw t3, a5, t2 + flw f13, 0(a3) + addw t3, t0, t2 lui t2, 24 fcvt.s.w f11, t3 addiw t1, t2, 1696 fdiv.s f12, f13, f11 fadd.s f10, f10, f12 blt a0, t1, label232 - lui a1, 24 - addiw a0, a1, 1696 + lui a3, 24 + addiw a0, a3, 1696 +.p2align 2 label118: - fsw f10, 0(a2) - lui t0, 24 - addiw a1, t0, 1696 - bge a5, a1, label121 - addi a2, a2, 4 - mv t0, a5 - j label113 -label121: - fmv.w.x f10, zero - mv a2, s1 - mv t0, zero - mv a0, zero -label122: - addiw a5, t0, 1 + fsw f10, 0(a1) + lui a5, 24 + addiw a3, a5, 1696 + bge t0, a3, label121 + addi a1, a1, 4 + mv a5, t0 +.p2align 2 +label113: + addiw t0, a5, 1 lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label144 - addiw a1, a0, 3 + addiw a3, t1, 1696 + bge a0, a3, label118 + addiw a3, a0, 3 lui t2, 24 addiw t1, t2, 1696 - bge a1, t1, label267 - sh2add a1, a0, a3 - addiw t1, t0, 2 - addiw t2, t0, 3 - addiw t3, t0, 4 + bge a3, t1, label752 + sh2add a3, a0, s0 + addiw t1, a5, 2 + addiw t2, a5, 3 + addiw t3, a5, 4 fmv.s f11, f10 -label139: - addw t5, t0, a0 + j label234 +.p2align 2 +label752: + fmv.w.x f11, zero +.p2align 2 +label223: + lui t1, 24 + addiw a3, t1, 1696 + bge a0, a3, label757 + sh2add a3, a0, s0 + j label228 +.p2align 2 +label238: + addi a3, a3, 16 +.p2align 2 +label234: addw t6, a5, a0 - mulw a6, t5, t6 - mv t4, a6 - bge a6, zero, label927 - addiw t4, a6, 1 -label927: - sraiw t5, t4, 1 - addiw a6, a0, 1 - flw f13, 0(a1) - addw t4, t1, a0 - addw a7, t5, a6 + addw t4, t0, a0 mulw a6, t6, t4 - fcvt.s.w f12, a7 mv t5, a6 - fdiv.s f14, f13, f12 - fadd.s f10, f11, f14 - bge a6, zero, label929 + bge a6, zero, label982 addiw t5, a6, 1 -label929: +label982: sraiw t6, t5, 1 - addiw a7, a0, 2 - flw f14, 4(a1) - addw t5, t2, a0 - addw a6, t6, a7 + flw f13, 0(a3) + addw t5, t1, a0 + addw a6, t0, t6 mulw t6, t4, t5 fcvt.s.w f12, a6 mv t4, t6 + fdiv.s f14, f13, f12 + fadd.s f10, f11, f14 + bge t6, zero, label984 + addiw t4, t6, 1 +label984: + sraiw a6, t4, 1 + flw f14, 4(a3) + addw t4, t2, a0 + addw t6, t0, a6 + fcvt.s.w f12, t6 + mulw t6, t5, t4 fdiv.s f13, f14, f12 + mv t5, t6 fadd.s f11, f10, f13 - bge t6, zero, label931 - addiw t4, t6, 1 -label931: - sraiw t6, t4, 1 - addiw a7, a0, 3 - flw f14, 8(a1) - addw a6, t6, a7 + bge t6, zero, label986 + addiw t5, t6, 1 +label986: + sraiw t6, t5, 1 + flw f13, 8(a3) + addw a6, t0, t6 addw t6, t3, a0 fcvt.s.w f12, a6 - mulw a6, t5, t6 - fdiv.s f13, f14, f12 - mv t4, a6 - fadd.s f10, f11, f13 - bge a6, zero, label933 - addiw t4, a6, 1 -label933: + mulw t5, t4, t6 + fdiv.s f14, f13, f12 + mv t4, t5 + fadd.s f10, f11, f14 + bge t5, zero, label988 + addiw t4, t5, 1 +label988: sraiw t5, t4, 1 addiw a0, a0, 4 - flw f14, 12(a1) - addw t6, t5, a0 + flw f13, 12(a3) + addw t6, t0, t5 lui t5, 24 fcvt.s.w f12, t6 addiw t4, t5, 1693 - fdiv.s f13, f14, f12 - fadd.s f11, f10, f13 - bge a0, t4, label338 - addi a1, a1, 16 - j label139 -label267: - fmv.w.x f11, zero -label128: - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label272 - sh2add a1, a0, a3 - j label133 -label272: - lui t0, 24 + fdiv.s f14, f13, f12 + fadd.s f11, f10, f14 + blt a0, t4, label238 fmv.s f10, f11 - addiw a0, t0, 1696 -label144: - fsw f10, 0(a2) - lui t0, 24 - addiw a1, t0, 1696 - bge a5, a1, label343 - addi a2, a2, 4 - mv t0, a5 - j label122 -label343: - fmv.w.x f10, zero - mv a2, a3 - mv t0, zero + j label223 +label202: + li a0, 76 + jal _sysy_stoptime mv a0, zero -label149: - addiw a5, t0, 1 - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label171 - addiw a1, a0, 3 - lui t2, 24 - addiw t1, t2, 1696 - bge a1, t1, label355 + fmv.w.x f10, zero +.p2align 2 +label203: sh2add a1, a0, s1 - addiw t1, t0, 2 - addiw t2, t0, 3 - addiw t3, t0, 4 -label156: - addw t6, t0, a0 - addw t5, a5, a0 - mulw a6, t6, t5 - mv t4, a6 - bge a6, zero, label939 - addiw t4, a6, 1 -label939: - sraiw a6, t4, 1 - flw f13, 0(a1) - addw t4, t1, a0 - addw t6, a5, a6 - fcvt.s.w f12, t6 - mulw t6, t5, t4 - fdiv.s f14, f13, f12 - mv t5, t6 - fadd.s f11, f10, f14 - bge t6, zero, label941 - addiw t5, t6, 1 -label941: - sraiw a6, t5, 1 + flw f13, 0(s0) + lui a2, 24 + addiw a0, a0, 16 + flw f15, 0(a1) + flw f12, 4(s0) + fmadd.s f11, f13, f15, f10 flw f14, 4(a1) - addw t5, t2, a0 - addw t6, a5, a6 - fcvt.s.w f12, t6 - mulw t6, t4, t5 - fdiv.s f13, f14, f12 - mv t4, t6 - fadd.s f10, f11, f13 - bge t6, zero, label943 - addiw t4, t6, 1 -label943: - sraiw a6, t4, 1 - flw f14, 8(a1) - addw t6, a5, a6 - addw a6, t3, a0 - fcvt.s.w f12, t6 - mulw t6, t5, a6 - fdiv.s f13, f14, f12 - mv t4, t6 - fadd.s f11, f10, f13 - bge t6, zero, label945 - addiw t4, t6, 1 -label945: - sraiw t5, t4, 1 - addiw a0, a0, 4 - flw f12, 12(a1) - addw t6, a5, t5 - lui t5, 24 - fcvt.s.w f10, t6 - addiw t4, t5, 1693 - fdiv.s f13, f12, f10 - fadd.s f11, f11, f13 - bge a0, t4, label402 - addi a1, a1, 16 + flw f13, 8(s0) + flw f15, 8(a1) + fmadd.s f10, f12, f14, f11 + flw f14, 12(s0) + flw f0, 12(a1) + flw f12, 16(s0) + fmadd.s f11, f13, f15, f10 + flw f13, 16(a1) + fmadd.s f10, f14, f0, f11 + flw f14, 20(s0) + flw f15, 20(a1) + fmadd.s f11, f12, f13, f10 + flw f13, 24(s0) + flw f0, 24(a1) + flw f12, 28(s0) + fmadd.s f10, f14, f15, f11 + flw f15, 28(a1) + flw f14, 32(s0) + fmadd.s f11, f13, f0, f10 + flw f0, 32(a1) + flw f13, 36(s0) + fmadd.s f10, f12, f15, f11 + flw f15, 36(a1) + fmadd.s f11, f14, f0, f10 + flw f14, 40(s0) + flw f0, 40(a1) + fmadd.s f12, f13, f15, f11 + flw f11, 44(s0) + flw f13, 44(a1) + fmadd.s f10, f14, f0, f12 + flw f14, 48(s0) + flw f0, 48(a1) + fmadd.s f12, f11, f13, f10 + flw f13, 52(s0) + flw f15, 52(a1) + fmadd.s f11, f14, f0, f12 + flw f14, 56(s0) + flw f0, 56(a1) + fmadd.s f10, f13, f15, f11 + flw f13, 60(s0) + flw f15, 60(a1) + addiw a1, a2, 1696 + fmadd.s f12, f14, f0, f10 + fmadd.s f11, f13, f15, f12 + bge a0, a1, label623 + addi s0, s0, 64 fmv.s f10, f11 - j label156 + j label203 +.p2align 2 +label121: + fmv.w.x f10, zero + mv a1, s1 + mv t0, zero + mv a0, zero + j label122 +.p2align 2 label137: - addi a1, a1, 4 + addi a3, a3, 4 +.p2align 2 label133: addw t2, t0, a0 addw t3, a5, a0 @@ -281,215 +263,119 @@ label133: label923: sraiw t2, t1, 1 addiw a0, a0, 1 - flw f13, 0(a1) + flw f13, 0(a3) addw t3, t2, a0 lui t2, 24 - fcvt.s.w f11, t3 + fcvt.s.w f10, t3 addiw t1, t2, 1696 - fdiv.s f12, f13, f11 - fadd.s f10, f10, f12 + fdiv.s f12, f13, f10 + fadd.s f11, f11, f12 blt a0, t1, label137 - lui a1, 24 - addiw a0, a1, 1696 - j label144 -label402: - fmv.s f10, f11 -label161: - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label407 - sh2add a1, a0, s1 -label166: - addw t2, t0, a0 - addw t3, a5, a0 - mulw t4, t2, t3 - mv t1, t4 - bge t4, zero, label950 - addiw t1, t4, 1 -label950: - sraiw t2, t1, 1 - addiw a0, a0, 1 - flw f13, 0(a1) - addw t3, a5, t2 - lui t2, 24 - fcvt.s.w f11, t3 - addiw t1, t2, 1696 - fdiv.s f12, f13, f11 - fadd.s f10, f10, f12 - bge a0, t1, label423 - addi a1, a1, 4 - j label166 -label407: - lui t0, 24 + lui a3, 24 fmv.s f10, f11 - addiw a0, t0, 1696 -label171: - fsw f10, 0(a2) + addiw a0, a3, 1696 +.p2align 2 +label144: + fsw f10, 0(a1) lui t0, 24 - addiw a1, t0, 1696 - bge a5, a1, label174 - addi a2, a2, 4 + addiw a3, t0, 1696 + bge a5, a3, label343 + addi a1, a1, 4 mv t0, a5 - j label149 -label174: - fmv.w.x f10, zero - mv a2, s0 - mv a5, zero - mv a0, zero -label175: - addiw t0, a5, 1 +.p2align 2 +label122: + addiw a5, t0, 1 lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label197 - addiw a1, a0, 3 + addiw a3, t1, 1696 + bge a0, a3, label144 + addiw a3, a0, 3 lui t2, 24 addiw t1, t2, 1696 - bge a1, t1, label439 - sh2add a1, a0, a3 - addiw t1, a5, 2 - addiw t2, a5, 3 - addiw t3, a5, 4 -label182: - addw t6, a5, a0 - addw t4, t0, a0 + bge a3, t1, label267 + sh2add a3, a0, a2 + addiw t1, t0, 2 + addiw t2, t0, 3 + addiw t3, t0, 4 +.p2align 2 +label139: + addw t6, t0, a0 + addw t4, a5, a0 mulw a6, t6, t4 mv t5, a6 - bge a6, zero, label957 + bge a6, zero, label927 addiw t5, a6, 1 -label957: +label927: sraiw t6, t5, 1 addiw a6, a0, 1 - flw f12, 0(a1) + flw f13, 0(a3) addw t5, t1, a0 addw a7, t6, a6 mulw a6, t4, t5 - fcvt.s.w f11, a7 + fcvt.s.w f12, a7 mv t6, a6 - fdiv.s f13, f12, f11 - fadd.s f10, f10, f13 - bge a6, zero, label959 + fdiv.s f14, f13, f12 + fadd.s f11, f10, f14 + bge a6, zero, label929 addiw t6, a6, 1 -label959: +label929: sraiw t4, t6, 1 - addiw a6, a0, 2 - flw f13, 4(a1) - addw a7, t4, a6 + addiw a7, a0, 2 + flw f14, 4(a3) + addw a6, t4, a7 addw t4, t2, a0 - fcvt.s.w f12, a7 + fcvt.s.w f12, a6 mulw t6, t5, t4 - fdiv.s f14, f13, f12 + fdiv.s f13, f14, f12 mv t5, t6 - fadd.s f11, f10, f14 - bge t6, zero, label961 + fadd.s f10, f11, f13 + bge t6, zero, label931 addiw t5, t6, 1 -label961: +label931: sraiw t6, t5, 1 addiw a6, a0, 3 - flw f14, 8(a1) + flw f14, 8(a3) addw t5, t3, a0 addw a7, t6, a6 - mulw a6, t4, t5 + mulw t6, t4, t5 fcvt.s.w f12, a7 - mv t6, a6 + mv t4, t6 fdiv.s f13, f14, f12 - fadd.s f10, f11, f13 - bge a6, zero, label963 - addiw t6, a6, 1 -label963: - sraiw t4, t6, 1 + fadd.s f11, f10, f13 + bge t6, zero, label933 + addiw t4, t6, 1 +label933: + sraiw t5, t4, 1 addiw a0, a0, 4 - flw f13, 12(a1) - addw t5, t4, a0 - fcvt.s.w f11, t5 + flw f13, 12(a3) + addw t6, t5, a0 lui t5, 24 - fdiv.s f12, f13, f11 + fcvt.s.w f12, t6 addiw t4, t5, 1693 - fadd.s f10, f10, f12 - blt a0, t4, label186 - fmv.s f11, f10 - j label187 -label197: - fsw f10, 0(a2) - lui a5, 24 - addiw a1, a5, 1696 - blt t0, a1, label200 - addiw a4, a4, 1 - li a0, 1000 - blt a4, a0, label111 - li a0, 76 - jal _sysy_stoptime - mv a0, zero - fmv.w.x f10, zero -label203: - sh2add a1, a0, s1 - flw f13, 0(s0) - lui a2, 24 - addiw a0, a0, 16 - flw f15, 0(a1) - flw f12, 4(s0) - fmadd.s f11, f13, f15, f10 - flw f14, 4(a1) - flw f13, 8(s0) - flw f15, 8(a1) - fmadd.s f10, f12, f14, f11 - flw f14, 12(s0) - flw f1, 12(a1) - flw f12, 16(s0) - flw f0, 16(a1) - fmadd.s f11, f13, f15, f10 - flw f13, 20(s0) - flw f15, 20(a1) - fmadd.s f10, f14, f1, f11 - flw f14, 24(s0) - fmadd.s f11, f12, f0, f10 - flw f0, 24(a1) - fmadd.s f10, f13, f15, f11 - flw f13, 28(s0) - flw f15, 28(a1) - fmadd.s f12, f14, f0, f10 - flw f14, 32(s0) - flw f0, 32(a1) - fmadd.s f11, f13, f15, f12 - flw f13, 36(s0) - flw f15, 36(a1) - flw f12, 40(s0) - flw f1, 40(a1) - fmadd.s f10, f14, f0, f11 - flw f14, 44(s0) - flw f0, 44(a1) - fmadd.s f11, f13, f15, f10 - flw f13, 48(s0) - flw f15, 48(a1) - fmadd.s f10, f12, f1, f11 - fmadd.s f11, f14, f0, f10 - flw f14, 52(s0) - flw f0, 52(a1) - fmadd.s f12, f13, f15, f11 - flw f11, 56(s0) - flw f15, 56(a1) - flw f13, 60(s0) - fmadd.s f10, f14, f0, f12 - flw f14, 60(a1) - addiw a1, a2, 1696 - fmadd.s f12, f11, f15, f10 - fmadd.s f11, f13, f14, f12 - blt a0, a1, label207 - fmv.w.x f10, zero - mv a0, zero - j label209 -label439: + fdiv.s f14, f13, f12 + fadd.s f10, f11, f14 + bge a0, t4, label338 + addi a3, a3, 16 + j label139 +.p2align 2 +label267: fmv.w.x f12, zero fmv.s f11, f10 fmv.s f10, f12 -label187: +.p2align 2 +label128: lui t1, 24 - addiw a1, t1, 1696 - blt a0, a1, label191 - lui a5, 24 - addiw a0, a5, 1696 - j label197 + addiw a3, t1, 1696 + bge a0, a3, label272 + sh2add a3, a0, a2 + j label133 +label623: + fmv.w.x f10, zero + mv a0, zero + j label209 +.p2align 2 label219: addi s1, s1, 64 +.p2align 2 label209: flw f14, 0(s1) addiw a0, a0, 16 @@ -529,51 +415,48 @@ label209: blt a0, a1, label219 fdiv.s f12, f11, f10 lui a0, 260096 -pcrel1016: +pcrel1020: auipc a2, %pcrel_hi(__cmmc_fp_constant_pool) fmv.w.x f13, a0 - addi a0, a2, %pcrel_lo(pcrel1016) - flw f11, 0(a0) + addi a0, a2, %pcrel_lo(pcrel1020) fsub.s f10, f13, f12 - flw f13, 4(a0) - flt.s a1, f11, f10 - flt.s a2, f10, f13 + flw f13, 0(a0) + flw f11, 4(a0) + flt.s a1, f13, f10 + flt.s a2, f10, f11 or a3, a1, a2 - bne a3, zero, label710 + beq a3, zero, label709 lui a0, 260096 - fmv.w.x f11, a0 - j label217 -label710: - lui a0, 260096 - fmv.s f10, f12 - fmv.w.x f11, a0 + fmv.s f11, f12 + fmv.w.x f10, a0 +.p2align 2 label214: - fadd.s f14, f11, f10 + fadd.s f14, f10, f11 lui a0, 258048 -pcrel1017: +pcrel1021: auipc a2, %pcrel_hi(__cmmc_fp_constant_pool) fmv.w.x f15, a0 - addi a0, a2, %pcrel_lo(pcrel1017) - fmul.s f11, f14, f15 + addi a0, a2, %pcrel_lo(pcrel1021) + fmul.s f10, f14, f15 flw f14, 0(a0) flw f15, 4(a0) - fdiv.s f10, f12, f11 - fsub.s f13, f11, f10 + fdiv.s f11, f12, f10 + fsub.s f13, f10, f11 flt.s a2, f13, f15 flt.s a1, f14, f13 or a3, a1, a2 bne a3, zero, label214 label217: lui a0, 260096 -pcrel1018: +pcrel1022: auipc a3, %pcrel_hi(__cmmc_fp_constant_pool) fmv.w.x f12, a0 - addi a1, a3, %pcrel_lo(pcrel1018) - fsub.s f10, f11, f12 + addi a1, a3, %pcrel_lo(pcrel1022) + fsub.s f11, f10, f12 flw f12, 0(a1) - flw f11, 4(a1) - fle.s a2, f10, f12 - fle.s a3, f11, f10 + flw f10, 4(a1) + fle.s a2, f11, f12 + fle.s a3, f10, f11 and a0, a2, a3 jal putint li a0, 10 @@ -584,11 +467,129 @@ pcrel1018: ld s1, 16(sp) addi sp, sp, 24 ret -label191: - sh2add a1, a0, a3 +.p2align 2 +label757: + lui a5, 24 + fmv.s f10, f11 + addiw a0, a5, 1696 + j label118 +.p2align 2 +label272: + lui t0, 24 + addiw a0, t0, 1696 + j label144 +.p2align 2 +label338: + fmv.s f11, f10 + j label128 +label709: + lui a0, 260096 + fmv.w.x f10, a0 + j label217 +.p2align 2 +label343: + fmv.w.x f10, zero + mv a3, a2 + mv a5, zero + mv a0, zero + j label149 +.p2align 2 +label171: + fsw f10, 0(a3) + lui a5, 24 + addiw a1, a5, 1696 + bge t0, a1, label174 + addi a3, a3, 4 + mv a5, t0 +.p2align 2 +label149: + addiw t0, a5, 1 + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label171 + addiw a1, a0, 3 + lui t2, 24 + addiw t1, t2, 1696 + bge a1, t1, label355 + sh2add a1, a0, s1 + addiw t2, a5, 2 + addiw t3, a5, 3 + addiw t1, a5, 4 + j label156 +.p2align 2 +label160: + addi a1, a1, 16 +.p2align 2 +label156: + addw t6, a5, a0 + addw t4, t0, a0 + mulw a6, t6, t4 + mv t5, a6 + bge a6, zero, label939 + addiw t5, a6, 1 +label939: + sraiw t6, t5, 1 + flw f13, 0(a1) + addw t5, t2, a0 + addw a6, t0, t6 + mulw t6, t4, t5 + fcvt.s.w f12, a6 + mv t4, t6 + fdiv.s f14, f13, f12 + fadd.s f11, f10, f14 + bge t6, zero, label941 + addiw t4, t6, 1 +label941: + sraiw a6, t4, 1 + flw f13, 4(a1) + addw t4, t3, a0 + addw t6, t0, a6 + fcvt.s.w f12, t6 + mulw t6, t5, t4 + fdiv.s f14, f13, f12 + mv a6, t6 + fadd.s f10, f11, f14 + bge t6, zero, label943 + addiw a6, t6, 1 +label943: + sraiw t6, a6, 1 + flw f14, 8(a1) + addw a6, t1, a0 + addw t5, t0, t6 + mulw t6, t4, a6 + fcvt.s.w f12, t5 + mv t5, t6 + fdiv.s f13, f14, f12 + fadd.s f11, f10, f13 + bge t6, zero, label945 + addiw t5, t6, 1 +label945: + sraiw t4, t5, 1 + addiw a0, a0, 4 + flw f13, 12(a1) + lui t5, 24 + addw t6, t0, t4 + addiw t4, t5, 1693 + fcvt.s.w f12, t6 + fdiv.s f14, f13, f12 + fadd.s f10, f11, f14 + blt a0, t4, label160 + fmv.s f11, f10 + j label161 +.p2align 2 +label174: + fmv.w.x f10, zero + mv a3, s0 + mv t0, zero + mv a0, zero + j label175 +.p2align 2 +label196: + addi a1, a1, 4 +.p2align 2 label192: - addw t2, a5, a0 - addw t3, t0, a0 + addw t2, t0, a0 + addw t3, a5, a0 mulw t4, t2, t3 mv t1, t4 bge t4, zero, label968 @@ -599,112 +600,160 @@ label968: flw f13, 0(a1) addw t3, t2, a0 lui t2, 24 - fcvt.s.w f10, t3 + fcvt.s.w f11, t3 addiw t1, t2, 1696 - fdiv.s f12, f13, f10 - fadd.s f11, f11, f12 + fdiv.s f12, f13, f11 + fadd.s f10, f10, f12 blt a0, t1, label196 lui a1, 24 - fmv.s f10, f11 addiw a0, a1, 1696 - j label197 -label423: - lui a1, 24 - addiw a0, a1, 1696 - j label171 -label757: +.p2align 2 +label197: + fsw f10, 0(a3) lui t0, 24 - fmv.s f10, f11 - addiw a0, t0, 1696 - j label118 -label207: - addi s0, s0, 64 - fmv.s f10, f11 - j label203 -label196: - addi a1, a1, 4 - j label192 -label200: - addi a2, a2, 4 - mv a5, t0 - j label175 -label186: - addi a1, a1, 16 - j label182 -label232: - addi a1, a1, 4 - j label228 -label338: - fmv.s f10, f11 - j label128 -label233: - sh2add a1, a0, s0 + addiw a1, t0, 1696 + bge a5, a1, label201 + addi a3, a3, 4 + mv t0, a5 +.p2align 2 +label175: + addiw a5, t0, 1 + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label197 + addiw a1, a0, 3 + lui t2, 24 + addiw t1, t2, 1696 + bge a1, t1, label439 + sh2add a1, a0, a2 addiw t1, t0, 2 addiw t2, t0, 3 addiw t3, t0, 4 - j label234 -label238: - addi a1, a1, 16 - fmv.s f10, f11 -label234: +.p2align 2 +label182: addw t6, t0, a0 - addw t4, a5, a0 - mulw a6, t6, t4 - mv t5, a6 - bge a6, zero, label982 - addiw t5, a6, 1 -label982: - sraiw t6, t5, 1 - flw f12, 0(a1) - addw a6, a5, t6 - addw t6, t1, a0 + addw t5, a5, a0 + mulw a6, t6, t5 + mv t4, a6 + bge a6, zero, label957 + addiw t4, a6, 1 +label957: + sraiw t6, t4, 1 + addiw a7, a0, 1 + flw f13, 0(a1) + addw t4, t1, a0 + addw a6, t6, a7 + mulw t6, t5, t4 fcvt.s.w f11, a6 - mulw t5, t4, t6 - fdiv.s f13, f12, f11 - mv t4, t5 - fadd.s f10, f10, f13 - bge t5, zero, label984 - addiw t4, t5, 1 -label984: - sraiw a6, t4, 1 - flw f14, 4(a1) - addw t4, t2, a0 - addw t5, a5, a6 - mulw a6, t6, t4 - fcvt.s.w f12, t5 - mv t5, a6 - fdiv.s f13, f14, f12 - fadd.s f11, f10, f13 - bge a6, zero, label986 - addiw t5, a6, 1 -label986: - sraiw t6, t5, 1 - flw f13, 8(a1) - addw a6, a5, t6 - fcvt.s.w f12, a6 - addw a6, t3, a0 - fdiv.s f14, f13, f12 - mulw t6, t4, a6 mv t5, t6 - fadd.s f10, f11, f14 - bge t6, zero, label988 + fdiv.s f12, f13, f11 + fadd.s f10, f10, f12 + bge t6, zero, label959 addiw t5, t6, 1 -label988: - sraiw t4, t5, 1 +label959: + sraiw t6, t5, 1 + addiw a6, a0, 2 + flw f13, 4(a1) + addw t5, t2, a0 + addw a7, t6, a6 + mulw a6, t4, t5 + fcvt.s.w f12, a7 + mv t6, a6 + fdiv.s f14, f13, f12 + fadd.s f11, f10, f14 + bge a6, zero, label961 + addiw t6, a6, 1 +label961: + sraiw t4, t6, 1 + addiw a6, a0, 3 + flw f14, 8(a1) + addw t6, t3, a0 + addw a7, t4, a6 + mulw a6, t5, t6 + fcvt.s.w f12, a7 + mv t4, a6 + fdiv.s f13, f14, f12 + fadd.s f10, f11, f13 + bge a6, zero, label963 + addiw t4, a6, 1 +label963: + sraiw t5, t4, 1 addiw a0, a0, 4 - flw f13, 12(a1) + flw f12, 12(a1) + addw t6, t5, a0 lui t5, 24 - addw t6, a5, t4 + fcvt.s.w f11, t6 addiw t4, t5, 1693 - fcvt.s.w f12, t6 - fdiv.s f14, f13, f12 - fadd.s f11, f10, f14 - blt a0, t4, label238 + fdiv.s f13, f12, f11 + fadd.s f10, f10, f13 + bge a0, t4, label489 + addi a1, a1, 16 + j label182 +.p2align 2 +label489: + fmv.s f11, f10 +.p2align 2 +label187: + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label494 + sh2add a1, a0, a2 fmv.s f10, f11 - j label223 + j label192 +.p2align 2 label355: - fmv.w.x f11, zero - j label161 + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 +.p2align 2 +label161: + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label407 + sh2add a1, a0, s1 +.p2align 2 +label166: + addw t2, a5, a0 + addw t3, t0, a0 + mulw t4, t2, t3 + mv t1, t4 + bge t4, zero, label950 + addiw t1, t4, 1 +label950: + sraiw t2, t1, 1 + addiw a0, a0, 1 + flw f12, 0(a1) + addw t3, t0, t2 + lui t2, 24 + fcvt.s.w f10, t3 + addiw t1, t2, 1696 + fdiv.s f13, f12, f10 + fadd.s f11, f11, f13 + bge a0, t1, label423 + addi a1, a1, 4 + j label166 +.p2align 2 +label439: + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 + j label187 +.p2align 2 +label494: + lui t0, 24 + addiw a0, t0, 1696 + j label197 +.p2align 2 +label423: + lui a1, 24 + fmv.s f10, f11 + addiw a0, a1, 1696 + j label171 +.p2align 2 +label407: + lui a5, 24 + addiw a0, a5, 1696 + j label171 .p2align 2 cmmc_parallel_body_0: mv a3, a0 diff --git a/tests/SysY2022/performance/vector_mul3.riscv.s b/tests/SysY2022/performance/vector_mul3.riscv.s index 2715880df..5eb1381d9 100644 --- a/tests/SysY2022/performance/vector_mul3.riscv.s +++ b/tests/SysY2022/performance/vector_mul3.riscv.s @@ -25,29 +25,29 @@ main: addi sp, sp, -24 li a0, 62 sd ra, 0(sp) - sd s0, 8(sp) - sd s1, 16(sp) + sd s1, 8(sp) + sd s0, 16(sp) jal _sysy_starttime lui a3, 24 -pcrel1018: +pcrel1017: auipc a0, %pcrel_hi(cmmc_parallel_body_payload_0) -pcrel1019: +pcrel1018: auipc a1, %pcrel_hi(vectorA) - addi s0, a1, %pcrel_lo(pcrel1019) -pcrel1020: + addi s1, a1, %pcrel_lo(pcrel1018) +pcrel1019: auipc a1, %pcrel_hi(cmmc_parallel_body_0) - sd s0, %pcrel_lo(pcrel1018)(a0) - addi a2, a1, %pcrel_lo(pcrel1020) + sd s1, %pcrel_lo(pcrel1017)(a0) + addi a2, a1, %pcrel_lo(pcrel1019) mv a0, zero addiw a1, a3, 1696 jal cmmcParallelFor -pcrel1021: +pcrel1020: auipc a1, %pcrel_hi(Vectortm) mv a4, zero -pcrel1022: +pcrel1021: auipc a0, %pcrel_hi(vectorB) - addi a3, a1, %pcrel_lo(pcrel1021) - addi s1, a0, %pcrel_lo(pcrel1022) + addi a3, a1, %pcrel_lo(pcrel1020) + addi s0, a0, %pcrel_lo(pcrel1021) j label111 .p2align 2 label201: @@ -57,261 +57,113 @@ label201: .p2align 2 label111: fmv.w.x f10, zero - mv a2, a3 + mv a1, a3 mv t0, zero mv a0, zero + j label113 +.p2align 2 +label757: + lui t0, 24 + addiw a0, t0, 1696 +.p2align 2 +label118: + fsw f10, 0(a1) + lui t0, 24 + addiw a2, t0, 1696 + bge a5, a2, label121 + addi a1, a1, 4 + mv t0, a5 .p2align 2 label113: addiw a5, t0, 1 lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label118 - addiw a1, a0, 3 + addiw a2, t1, 1696 + bge a0, a2, label118 + addiw a2, a0, 3 lui t2, 24 addiw t1, t2, 1696 - bge a1, t1, label752 - sh2add a1, a0, s0 + bge a2, t1, label752 + sh2add a2, a0, s1 addiw t1, t0, 2 addiw t2, t0, 3 addiw t3, t0, 4 j label234 .p2align 2 -label981: - addiw t5, t5, 1 +label121: + fmv.w.x f10, zero + mv a2, s0 + mv a5, zero + mv a0, zero + j label122 +.p2align 2 +label238: + addi a2, a2, 16 .p2align 2 +label234: + addw t6, t0, a0 + addw t4, a5, a0 + mulw a6, t6, t4 + mv t5, a6 + bge a6, zero, label982 + addiw t5, a6, 1 label982: sraiw t6, t5, 1 - flw f12, 0(a1) + flw f12, 0(a2) + addw t5, t1, a0 addw a6, a5, t6 - addw t6, t1, a0 + mulw t6, t4, t5 fcvt.s.w f11, a6 - mulw t5, t4, t6 + mv t4, t6 fdiv.s f13, f12, f11 - mv t4, t5 fadd.s f10, f10, f13 - bge t5, zero, label984 - addiw t4, t5, 1 + bge t6, zero, label984 + addiw t4, t6, 1 label984: sraiw a6, t4, 1 - flw f14, 4(a1) + flw f14, 4(a2) addw t4, t2, a0 - addw t5, a5, a6 - mulw a6, t6, t4 - fcvt.s.w f12, t5 - mv t5, a6 + addw t6, a5, a6 + fcvt.s.w f12, t6 + mulw t6, t5, t4 fdiv.s f13, f14, f12 + mv t5, t6 fadd.s f11, f10, f13 - bge a6, zero, label986 - addiw t5, a6, 1 + bge t6, zero, label986 + addiw t5, t6, 1 label986: sraiw t6, t5, 1 - flw f13, 8(a1) + flw f13, 8(a2) addw a6, a5, t6 + addw t6, t3, a0 fcvt.s.w f12, a6 - addw a6, t3, a0 + mulw t5, t4, t6 fdiv.s f14, f13, f12 - mulw t6, t4, a6 - mv t5, t6 + mv t4, t5 fadd.s f10, f11, f14 - bge t6, zero, label988 - addiw t5, t6, 1 + bge t5, zero, label988 + addiw t4, t5, 1 label988: - sraiw t4, t5, 1 + sraiw t5, t4, 1 addiw a0, a0, 4 - flw f13, 12(a1) + flw f12, 12(a2) + addw t6, a5, t5 lui t5, 24 - addw t6, a5, t4 + fcvt.s.w f11, t6 addiw t4, t5, 1693 - fcvt.s.w f12, t6 - fdiv.s f14, f13, f12 - fadd.s f11, f10, f14 - bge a0, t4, label820 - addi a1, a1, 16 - fmv.s f10, f11 -.p2align 2 -label234: - addw t6, t0, a0 - addw t4, a5, a0 - mulw a6, t6, t4 - mv t5, a6 - bge a6, zero, label982 - j label981 -label202: - li a0, 76 - jal _sysy_stoptime - mv a0, zero - fmv.w.x f10, zero - j label203 -.p2align 2 -label207: - addi s0, s0, 64 - fmv.s f10, f11 -.p2align 2 -label203: - sh2add a1, a0, s1 - flw f13, 0(s0) - lui a2, 24 - addiw a0, a0, 16 - flw f15, 0(a1) - flw f12, 4(s0) - fmadd.s f11, f13, f15, f10 - flw f14, 4(a1) - flw f13, 8(s0) - flw f15, 8(a1) - fmadd.s f10, f12, f14, f11 - flw f14, 12(s0) - flw f1, 12(a1) - flw f12, 16(s0) - flw f0, 16(a1) - fmadd.s f11, f13, f15, f10 - flw f13, 20(s0) - flw f15, 20(a1) - fmadd.s f10, f14, f1, f11 - flw f14, 24(s0) - fmadd.s f11, f12, f0, f10 - flw f0, 24(a1) - fmadd.s f10, f13, f15, f11 - flw f13, 28(s0) - flw f15, 28(a1) - fmadd.s f12, f14, f0, f10 - flw f14, 32(s0) - flw f0, 32(a1) - fmadd.s f11, f13, f15, f12 - flw f13, 36(s0) - flw f15, 36(a1) - flw f12, 40(s0) - flw f1, 40(a1) - fmadd.s f10, f14, f0, f11 - flw f14, 44(s0) - flw f0, 44(a1) - fmadd.s f11, f13, f15, f10 - flw f13, 48(s0) - flw f15, 48(a1) - fmadd.s f10, f12, f1, f11 - fmadd.s f11, f14, f0, f10 - flw f14, 52(s0) - flw f0, 52(a1) - fmadd.s f12, f13, f15, f11 - flw f11, 56(s0) - flw f15, 56(a1) - flw f13, 60(s0) - fmadd.s f10, f14, f0, f12 - flw f14, 60(a1) - addiw a1, a2, 1696 - fmadd.s f12, f11, f15, f10 - fmadd.s f11, f13, f14, f12 - blt a0, a1, label207 - fmv.w.x f10, zero - mv a0, zero - j label209 -.p2align 2 -label219: - addi s1, s1, 64 -.p2align 2 -label209: - flw f14, 0(s1) - addiw a0, a0, 16 - lui a2, 24 - flw f15, 4(s1) - fmadd.s f13, f14, f14, f10 - addiw a1, a2, 1696 - flw f14, 8(s1) - fmadd.s f12, f15, f15, f13 - flw f15, 12(s1) - flw f0, 16(s1) - fmadd.s f10, f14, f14, f12 - fmadd.s f13, f15, f15, f10 - flw f15, 20(s1) - fmadd.s f14, f0, f0, f13 - flw f13, 24(s1) - fmadd.s f12, f15, f15, f14 - flw f15, 28(s1) - flw f0, 32(s1) - fmadd.s f10, f13, f13, f12 - fmadd.s f14, f15, f15, f10 - flw f15, 36(s1) - fmadd.s f13, f0, f0, f14 - flw f14, 40(s1) - flw f0, 44(s1) - fmadd.s f12, f15, f15, f13 - flw f15, 48(s1) - fmadd.s f10, f14, f14, f12 - fmadd.s f13, f0, f0, f10 - flw f10, 52(s1) - fmadd.s f14, f15, f15, f13 - flw f15, 56(s1) - fmadd.s f12, f10, f10, f14 - flw f14, 60(s1) - fmadd.s f13, f15, f15, f12 - fmadd.s f10, f14, f14, f13 - blt a0, a1, label219 - fdiv.s f12, f11, f10 - lui a0, 260096 -pcrel1023: - auipc a2, %pcrel_hi(__cmmc_fp_constant_pool) - fmv.w.x f13, a0 - addi a0, a2, %pcrel_lo(pcrel1023) - flw f11, 0(a0) - fsub.s f10, f13, f12 - flw f13, 4(a0) - flt.s a1, f11, f10 - flt.s a2, f10, f13 - or a3, a1, a2 - beq a3, zero, label709 - lui a0, 260096 - fmv.s f11, f12 - fmv.w.x f10, a0 -.p2align 2 -label214: - fadd.s f14, f10, f11 - lui a0, 258048 -pcrel1024: - auipc a2, %pcrel_hi(__cmmc_fp_constant_pool) - fmv.w.x f15, a0 - addi a0, a2, %pcrel_lo(pcrel1024) - fmul.s f10, f14, f15 - flw f14, 0(a0) - flw f15, 4(a0) - fdiv.s f11, f12, f10 - fsub.s f13, f10, f11 - flt.s a2, f13, f15 - flt.s a1, f14, f13 - or a3, a1, a2 - bne a3, zero, label214 -label217: - lui a0, 260096 -pcrel1025: - auipc a3, %pcrel_hi(__cmmc_fp_constant_pool) - fmv.w.x f12, a0 - addi a1, a3, %pcrel_lo(pcrel1025) - fsub.s f11, f10, f12 - flw f12, 0(a1) - flw f10, 4(a1) - fle.s a2, f11, f12 - fle.s a3, f10, f11 - and a0, a2, a3 - jal putint - li a0, 10 - jal putch - ld ra, 0(sp) - mv a0, zero - ld s0, 8(sp) - ld s1, 16(sp) - addi sp, sp, 24 - ret -.p2align 2 -label752: - fmv.w.x f11, zero + fdiv.s f13, f12, f11 + fadd.s f10, f10, f13 + blt a0, t4, label238 + fmv.s f11, f10 .p2align 2 label223: lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label757 - sh2add a1, a0, s0 + addiw a2, t1, 1696 + bge a0, a2, label757 + sh2add a2, a0, s1 j label228 .p2align 2 label232: - addi a1, a1, 4 + addi a2, a2, 4 .p2align 2 label228: addw t2, t0, a0 @@ -323,31 +175,26 @@ label228: label978: sraiw t2, t1, 1 addiw a0, a0, 1 - flw f13, 0(a1) + flw f13, 0(a2) addw t3, a5, t2 lui t2, 24 - fcvt.s.w f11, t3 + fcvt.s.w f10, t3 addiw t1, t2, 1696 - fdiv.s f12, f13, f11 - fadd.s f10, f10, f12 + fdiv.s f12, f13, f10 + fadd.s f11, f11, f12 blt a0, t1, label232 - lui a1, 24 - addiw a0, a1, 1696 + lui a2, 24 + fmv.s f10, f11 + addiw a0, a2, 1696 + j label118 .p2align 2 -label118: +label144: fsw f10, 0(a2) - lui t0, 24 - addiw a1, t0, 1696 - bge a5, a1, label121 + lui a5, 24 + addiw a1, a5, 1696 + bge t0, a1, label343 addi a2, a2, 4 - mv t0, a5 - j label113 -.p2align 2 -label121: - fmv.w.x f10, zero - mv a2, s1 - mv a5, zero - mv a0, zero + mv a5, t0 .p2align 2 label122: addiw t0, a5, 1 @@ -362,119 +209,48 @@ label122: addiw t1, a5, 2 addiw t2, a5, 3 addiw t3, a5, 4 - fmv.s f11, f10 j label139 .p2align 2 -label143: - addi a1, a1, 16 +label267: + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 .p2align 2 -label139: - addw t5, a5, a0 - addw t6, t0, a0 - mulw a6, t5, t6 - mv t4, a6 - bge a6, zero, label927 - addiw t4, a6, 1 -label927: - sraiw t5, t4, 1 - addiw a6, a0, 1 - flw f13, 0(a1) - addw t4, t1, a0 - addw a7, t5, a6 - mulw a6, t6, t4 - fcvt.s.w f12, a7 - mv t5, a6 - fdiv.s f14, f13, f12 - fadd.s f10, f11, f14 - bge a6, zero, label929 - addiw t5, a6, 1 -label929: - sraiw t6, t5, 1 - addiw a7, a0, 2 - flw f14, 4(a1) - addw t5, t2, a0 - addw a6, t6, a7 - mulw t6, t4, t5 - fcvt.s.w f12, a6 - mv t4, t6 - fdiv.s f13, f14, f12 - fadd.s f11, f10, f13 - bge t6, zero, label931 - addiw t4, t6, 1 -label931: - sraiw t6, t4, 1 - addiw a7, a0, 3 - flw f14, 8(a1) - addw a6, t6, a7 - addw t6, t3, a0 - fcvt.s.w f12, a6 - mulw a6, t5, t6 - fdiv.s f13, f14, f12 - mv t4, a6 - fadd.s f10, f11, f13 - bge a6, zero, label933 - addiw t4, a6, 1 -label933: - sraiw t5, t4, 1 - addiw a0, a0, 4 - flw f14, 12(a1) - addw t6, t5, a0 - lui t5, 24 - fcvt.s.w f12, t6 - addiw t4, t5, 1693 - fdiv.s f13, f14, f12 - fadd.s f11, f10, f13 - blt a0, t4, label143 - fmv.s f10, f11 -.p2align 2 -label128: - lui t1, 24 - addiw a1, t1, 1696 - bge a0, a1, label272 - sh2add a1, a0, a3 -.p2align 2 -label133: - addw t2, a5, a0 - addw t3, t0, a0 - mulw t4, t2, t3 - mv t1, t4 - bge t4, zero, label923 - addiw t1, t4, 1 -label923: - sraiw t2, t1, 1 - addiw a0, a0, 1 +label128: + lui t1, 24 + addiw a1, t1, 1696 + bge a0, a1, label272 + sh2add a1, a0, a3 +.p2align 2 +label133: + addw t2, a5, a0 + addw t3, t0, a0 + mulw t4, t2, t3 + mv t1, t4 + bge t4, zero, label923 + addiw t1, t4, 1 +label923: + sraiw t2, t1, 1 + addiw a0, a0, 1 flw f13, 0(a1) addw t3, t2, a0 lui t2, 24 - fcvt.s.w f11, t3 + fcvt.s.w f10, t3 addiw t1, t2, 1696 - fdiv.s f12, f13, f11 - fadd.s f10, f10, f12 + fdiv.s f12, f13, f10 + fadd.s f11, f11, f12 bge a0, t1, label288 addi a1, a1, 4 j label133 .p2align 2 -label288: - lui a1, 24 - addiw a0, a1, 1696 -.p2align 2 -label144: - fsw f10, 0(a2) - lui a5, 24 - addiw a1, a5, 1696 - bge t0, a1, label343 - addi a2, a2, 4 - mv a5, t0 - j label122 -.p2align 2 label343: fmv.w.x f10, zero mv a2, a3 - mv t0, zero + mv a5, zero mv a0, zero .p2align 2 label149: - addiw a5, t0, 1 + addiw t0, a5, 1 lui t1, 24 addiw a1, t1, 1696 bge a0, a1, label171 @@ -482,81 +258,83 @@ label149: lui t2, 24 addiw t1, t2, 1696 bge a1, t1, label355 - sh2add a1, a0, s1 - addiw t1, t0, 2 - addiw t2, t0, 3 - addiw t3, t0, 4 + sh2add a1, a0, s0 + addiw t1, a5, 2 + addiw t3, a5, 3 + addiw t2, a5, 4 .p2align 2 label156: - addw t6, t0, a0 - addw t5, a5, a0 - mulw a6, t6, t5 - mv t4, a6 + addw t6, a5, a0 + addw t4, t0, a0 + mulw a6, t6, t4 + mv t5, a6 bge a6, zero, label939 - addiw t4, a6, 1 + addiw t5, a6, 1 label939: - sraiw a6, t4, 1 + sraiw t6, t5, 1 flw f13, 0(a1) - addw t4, t1, a0 - addw t6, a5, a6 - fcvt.s.w f12, t6 - mulw t6, t5, t4 + addw t5, t1, a0 + addw a6, t0, t6 + mulw t6, t4, t5 + fcvt.s.w f12, a6 + mv t4, t6 fdiv.s f14, f13, f12 - mv t5, t6 fadd.s f11, f10, f14 bge t6, zero, label941 - addiw t5, t6, 1 + addiw t4, t6, 1 label941: - sraiw a6, t5, 1 - flw f14, 4(a1) - addw t5, t2, a0 - addw t6, a5, a6 + sraiw a6, t4, 1 + flw f13, 4(a1) + addw t4, t3, a0 + addw t6, t0, a6 fcvt.s.w f12, t6 - mulw t6, t4, t5 - fdiv.s f13, f14, f12 - mv t4, t6 - fadd.s f10, f11, f13 + mulw t6, t5, t4 + fdiv.s f14, f13, f12 + mv a6, t6 + fadd.s f10, f11, f14 bge t6, zero, label943 - addiw t4, t6, 1 + addiw a6, t6, 1 label943: - sraiw a6, t4, 1 + sraiw t6, a6, 1 flw f14, 8(a1) - addw t6, a5, a6 - addw a6, t3, a0 - fcvt.s.w f12, t6 - mulw t6, t5, a6 + addw a6, t2, a0 + addw t5, t0, t6 + mulw t6, t4, a6 + fcvt.s.w f12, t5 + mv t5, t6 fdiv.s f13, f14, f12 - mv t4, t6 fadd.s f11, f10, f13 bge t6, zero, label945 - addiw t4, t6, 1 + addiw t5, t6, 1 label945: - sraiw t5, t4, 1 + sraiw t4, t5, 1 addiw a0, a0, 4 - flw f12, 12(a1) - addw t6, a5, t5 + flw f13, 12(a1) lui t5, 24 - fcvt.s.w f10, t6 + addw t6, t0, t4 addiw t4, t5, 1693 - fdiv.s f13, f12, f10 - fadd.s f11, f11, f13 + fcvt.s.w f12, t6 + fdiv.s f14, f13, f12 + fadd.s f10, f11, f14 bge a0, t4, label402 addi a1, a1, 16 - fmv.s f10, f11 j label156 .p2align 2 label355: - fmv.w.x f11, zero + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 .p2align 2 label161: lui t1, 24 addiw a1, t1, 1696 bge a0, a1, label407 - sh2add a1, a0, s1 + sh2add a1, a0, s0 + fmv.s f10, f11 .p2align 2 label166: - addw t2, t0, a0 - addw t3, a5, a0 + addw t2, a5, a0 + addw t3, t0, a0 mulw t4, t2, t3 mv t1, t4 bge t4, zero, label950 @@ -564,51 +342,281 @@ label166: label950: sraiw t2, t1, 1 addiw a0, a0, 1 - flw f13, 0(a1) - addw t3, a5, t2 + flw f12, 0(a1) + addw t3, t0, t2 lui t2, 24 fcvt.s.w f11, t3 addiw t1, t2, 1696 - fdiv.s f12, f13, f11 - fadd.s f10, f10, f12 + fdiv.s f13, f12, f11 + fadd.s f10, f10, f13 bge a0, t1, label423 addi a1, a1, 4 j label166 .p2align 2 +label143: + addi a1, a1, 16 +.p2align 2 +label139: + addw t6, a5, a0 + addw t4, t0, a0 + mulw a6, t6, t4 + mv t5, a6 + bge a6, zero, label927 + addiw t5, a6, 1 +label927: + sraiw t6, t5, 1 + addiw a6, a0, 1 + flw f13, 0(a1) + addw t5, t1, a0 + addw a7, t6, a6 + mulw a6, t4, t5 + fcvt.s.w f12, a7 + mv t6, a6 + fdiv.s f14, f13, f12 + fadd.s f11, f10, f14 + bge a6, zero, label929 + addiw t6, a6, 1 +label929: + sraiw t4, t6, 1 + addiw a7, a0, 2 + flw f14, 4(a1) + addw a6, t4, a7 + addw t4, t2, a0 + fcvt.s.w f12, a6 + mulw t6, t5, t4 + fdiv.s f13, f14, f12 + mv t5, t6 + fadd.s f10, f11, f13 + bge t6, zero, label931 + addiw t5, t6, 1 +label931: + sraiw t6, t5, 1 + addiw a6, a0, 3 + flw f14, 8(a1) + addw t5, t3, a0 + addw a7, t6, a6 + mulw t6, t4, t5 + fcvt.s.w f12, a7 + mv t4, t6 + fdiv.s f13, f14, f12 + fadd.s f11, f10, f13 + bge t6, zero, label933 + addiw t4, t6, 1 +label933: + sraiw t5, t4, 1 + addiw a0, a0, 4 + flw f13, 12(a1) + addw t6, t5, a0 + lui t5, 24 + fcvt.s.w f12, t6 + addiw t4, t5, 1693 + fdiv.s f14, f13, f12 + fadd.s f10, f11, f14 + blt a0, t4, label143 + fmv.s f11, f10 + j label128 +.p2align 2 label423: lui a1, 24 addiw a0, a1, 1696 .p2align 2 label171: fsw f10, 0(a2) - lui t0, 24 - addiw a1, t0, 1696 - bge a5, a1, label174 + lui a5, 24 + addiw a1, a5, 1696 + bge t0, a1, label174 addi a2, a2, 4 - mv t0, a5 + mv a5, t0 j label149 +label202: + li a0, 76 + jal _sysy_stoptime + mv a1, zero + mv a0, s1 + fmv.w.x f10, zero + j label203 +.p2align 2 +label207: + addi a0, a0, 64 + fmv.s f10, f11 +.p2align 2 +label203: + sh2add a2, a1, s0 + flw f13, 0(a0) + lui a3, 24 + addiw a1, a1, 16 + flw f15, 0(a2) + flw f12, 4(a0) + fmadd.s f11, f13, f15, f10 + flw f14, 4(a2) + flw f13, 8(a0) + flw f15, 8(a2) + fmadd.s f10, f12, f14, f11 + flw f14, 12(a0) + flw f0, 12(a2) + flw f12, 16(a0) + fmadd.s f11, f13, f15, f10 + flw f13, 16(a2) + fmadd.s f10, f14, f0, f11 + flw f14, 20(a0) + flw f15, 20(a2) + fmadd.s f11, f12, f13, f10 + flw f13, 24(a0) + flw f0, 24(a2) + flw f12, 28(a0) + fmadd.s f10, f14, f15, f11 + flw f15, 28(a2) + flw f14, 32(a0) + fmadd.s f11, f13, f0, f10 + flw f0, 32(a2) + flw f13, 36(a0) + fmadd.s f10, f12, f15, f11 + flw f15, 36(a2) + fmadd.s f11, f14, f0, f10 + flw f14, 40(a0) + flw f0, 40(a2) + fmadd.s f12, f13, f15, f11 + flw f11, 44(a0) + flw f13, 44(a2) + fmadd.s f10, f14, f0, f12 + flw f14, 48(a0) + flw f15, 48(a2) + fmadd.s f12, f11, f13, f10 + flw f13, 52(a0) + flw f1, 52(a2) + fmadd.s f11, f14, f15, f12 + flw f14, 56(a0) + flw f0, 56(a2) + fmadd.s f10, f13, f1, f11 + flw f13, 60(a0) + flw f15, 60(a2) + addiw a2, a3, 1696 + fmadd.s f12, f14, f0, f10 + fmadd.s f11, f13, f15, f12 + blt a1, a2, label207 + fmv.w.x f10, zero + mv a0, zero +.p2align 2 +label209: + flw f14, 0(s0) + addiw a0, a0, 16 + lui a2, 24 + flw f15, 4(s0) + fmadd.s f13, f14, f14, f10 + addiw a1, a2, 1696 + flw f14, 8(s0) + fmadd.s f12, f15, f15, f13 + flw f15, 12(s0) + flw f0, 16(s0) + fmadd.s f10, f14, f14, f12 + fmadd.s f13, f15, f15, f10 + flw f15, 20(s0) + fmadd.s f14, f0, f0, f13 + flw f13, 24(s0) + fmadd.s f12, f15, f15, f14 + flw f15, 28(s0) + flw f0, 32(s0) + fmadd.s f10, f13, f13, f12 + fmadd.s f14, f15, f15, f10 + flw f15, 36(s0) + fmadd.s f13, f0, f0, f14 + flw f14, 40(s0) + flw f0, 44(s0) + fmadd.s f12, f15, f15, f13 + flw f15, 48(s0) + fmadd.s f10, f14, f14, f12 + fmadd.s f13, f0, f0, f10 + flw f10, 52(s0) + fmadd.s f14, f15, f15, f13 + flw f15, 56(s0) + fmadd.s f12, f10, f10, f14 + flw f14, 60(s0) + fmadd.s f13, f15, f15, f12 + fmadd.s f10, f14, f14, f13 + bge a0, a1, label213 + addi s0, s0, 64 + j label209 +label213: + fdiv.s f12, f11, f10 + lui a0, 260096 +pcrel1022: + auipc a2, %pcrel_hi(__cmmc_fp_constant_pool) + fmv.w.x f13, a0 + addi a0, a2, %pcrel_lo(pcrel1022) + fsub.s f10, f13, f12 + flw f13, 0(a0) + flw f11, 4(a0) + flt.s a1, f13, f10 + flt.s a2, f10, f11 + or a3, a1, a2 + beq a3, zero, label709 + lui a0, 260096 + fmv.s f11, f12 + fmv.w.x f10, a0 +.p2align 2 +label214: + fadd.s f14, f10, f11 + lui a0, 258048 +pcrel1023: + auipc a2, %pcrel_hi(__cmmc_fp_constant_pool) + fmv.w.x f15, a0 + addi a0, a2, %pcrel_lo(pcrel1023) + fmul.s f10, f14, f15 + flw f14, 0(a0) + flw f15, 4(a0) + fdiv.s f11, f12, f10 + fsub.s f13, f10, f11 + flt.s a2, f13, f15 + flt.s a1, f14, f13 + or a3, a1, a2 + bne a3, zero, label214 + j label217 +label709: + lui a0, 260096 + fmv.w.x f10, a0 +label217: + lui a0, 260096 +pcrel1024: + auipc a3, %pcrel_hi(__cmmc_fp_constant_pool) + fmv.w.x f12, a0 + addi a1, a3, %pcrel_lo(pcrel1024) + fsub.s f11, f10, f12 + flw f12, 0(a1) + flw f10, 4(a1) + fle.s a2, f11, f12 + fle.s a3, f10, f11 + and a0, a2, a3 + jal putint + li a0, 10 + jal putch + ld ra, 0(sp) + mv a0, zero + ld s1, 8(sp) + ld s0, 16(sp) + addi sp, sp, 24 + ret .p2align 2 label174: fmv.w.x f10, zero - mv a2, s0 - mv a5, zero + mv a2, s1 + mv t0, zero mv a0, zero j label175 .p2align 2 -label510: - lui a1, 24 - addiw a0, a1, 1696 +label494: + lui t0, 24 + addiw a0, t0, 1696 .p2align 2 label197: fsw f10, 0(a2) - lui a5, 24 - addiw a1, a5, 1696 - bge t0, a1, label201 + lui t0, 24 + addiw a1, t0, 1696 + bge a5, a1, label201 addi a2, a2, 4 - mv a5, t0 + mv t0, a5 .p2align 2 label175: - addiw t0, a5, 1 + addiw a5, t0, 1 lui t1, 24 addiw a1, t1, 1696 bge a0, a1, label197 @@ -617,83 +625,85 @@ label175: addiw t1, t2, 1696 bge a1, t1, label439 sh2add a1, a0, a3 - addiw t1, a5, 2 - addiw t2, a5, 3 - addiw t3, a5, 4 + addiw t1, t0, 2 + addiw t2, t0, 3 + addiw t3, t0, 4 .p2align 2 label182: - addw t6, a5, a0 - addw t4, t0, a0 - mulw a6, t6, t4 - mv t5, a6 + addw t6, t0, a0 + addw t5, a5, a0 + mulw a6, t6, t5 + mv t4, a6 bge a6, zero, label957 - addiw t5, a6, 1 + addiw t4, a6, 1 label957: - sraiw t6, t5, 1 - addiw a6, a0, 1 - flw f12, 0(a1) - addw t5, t1, a0 - addw a7, t6, a6 - mulw a6, t4, t5 - fcvt.s.w f11, a7 - mv t6, a6 - fdiv.s f13, f12, f11 - fadd.s f10, f10, f13 - bge a6, zero, label959 - addiw t6, a6, 1 + sraiw t6, t4, 1 + addiw a7, a0, 1 + flw f13, 0(a1) + addw t4, t1, a0 + addw a6, t6, a7 + mulw t6, t5, t4 + fcvt.s.w f11, a6 + mv t5, t6 + fdiv.s f12, f13, f11 + fadd.s f10, f10, f12 + bge t6, zero, label959 + addiw t5, t6, 1 label959: - sraiw t4, t6, 1 + sraiw t6, t5, 1 addiw a6, a0, 2 flw f13, 4(a1) - addw a7, t4, a6 - addw t4, t2, a0 + addw t5, t2, a0 + addw a7, t6, a6 + mulw a6, t4, t5 fcvt.s.w f12, a7 - mulw t6, t5, t4 + mv t6, a6 fdiv.s f14, f13, f12 - mv t5, t6 fadd.s f11, f10, f14 - bge t6, zero, label961 - addiw t5, t6, 1 + bge a6, zero, label961 + addiw t6, a6, 1 label961: - sraiw t6, t5, 1 + sraiw t4, t6, 1 addiw a6, a0, 3 flw f14, 8(a1) - addw t5, t3, a0 - addw a7, t6, a6 - mulw a6, t4, t5 + addw t6, t3, a0 + addw a7, t4, a6 + mulw a6, t5, t6 fcvt.s.w f12, a7 - mv t6, a6 + mv t4, a6 fdiv.s f13, f14, f12 fadd.s f10, f11, f13 bge a6, zero, label963 - addiw t6, a6, 1 + addiw t4, a6, 1 label963: - sraiw t4, t6, 1 + sraiw t5, t4, 1 addiw a0, a0, 4 - flw f14, 12(a1) - addw t5, t4, a0 - fcvt.s.w f12, t5 + flw f12, 12(a1) + addw t6, t5, a0 lui t5, 24 - fdiv.s f13, f14, f12 + fcvt.s.w f11, t6 addiw t4, t5, 1693 - fadd.s f11, f10, f13 + fdiv.s f13, f12, f11 + fadd.s f10, f10, f13 bge a0, t4, label489 addi a1, a1, 16 - fmv.s f10, f11 j label182 .p2align 2 -label489: - fmv.s f10, f11 +label439: + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 .p2align 2 label187: lui t1, 24 addiw a1, t1, 1696 bge a0, a1, label494 sh2add a1, a0, a3 + fmv.s f10, f11 .p2align 2 label192: - addw t2, a5, a0 - addw t3, t0, a0 + addw t2, t0, a0 + addw t3, a5, a0 mulw t4, t2, t3 mv t1, t4 bge t4, zero, label968 @@ -712,49 +722,40 @@ label968: addi a1, a1, 4 j label192 .p2align 2 -label757: - lui t0, 24 - fmv.s f10, f11 - addiw a0, t0, 1696 - j label118 +label402: + fmv.s f11, f10 + j label161 .p2align 2 -label820: - fmv.s f10, f11 - j label223 +label489: + fmv.s f11, f10 + j label187 .p2align 2 -label494: - lui a5, 24 - fmv.s f10, f11 - addiw a0, a5, 1696 +label510: + lui a1, 24 + addiw a0, a1, 1696 j label197 .p2align 2 -label439: - fmv.w.x f11, zero - j label187 -.p2align 2 label407: - lui t0, 24 - fmv.s f10, f11 - addiw a0, t0, 1696 + lui a5, 24 + addiw a0, a5, 1696 j label171 .p2align 2 -label272: - lui a5, 24 +label288: + lui a1, 24 fmv.s f10, f11 - addiw a0, a5, 1696 + addiw a0, a1, 1696 j label144 -label709: - lui a0, 260096 - fmv.w.x f10, a0 - j label217 .p2align 2 -label402: - fmv.s f10, f11 - j label161 +label752: + fmv.w.x f12, zero + fmv.s f11, f10 + fmv.s f10, f12 + j label223 .p2align 2 -label267: - fmv.w.x f11, zero - j label128 +label272: + lui a5, 24 + addiw a0, a5, 1696 + j label144 .p2align 2 cmmc_parallel_body_0: mv a3, a0