Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions enzyme/Enzyme/DiffeGradientUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,65 @@ SmallVector<SelectInst *, 4> DiffeGradientUtils::addToDiffe(
return res;
}

if (auto VecT = dyn_cast<VectorType>(VT)) {
if (!VecT->getElementCount().isScalable()) {
Type *elemTy = VecT->getElementType();
auto elemBytes = (DL.getTypeSizeInBits(elemTy) + 7) / 8;

// Only handle element-aligned windows
if (elemBytes != 0 && start % elemBytes == 0 && size % elemBytes == 0) {
unsigned left_idx = start / elemBytes;
unsigned right_idx = (start + size) / elemBytes; // exclusive

unsigned numElts = VecT->getElementCount().getFixedValue();
if (left_idx > numElts)
left_idx = numElts;
if (right_idx > numElts)
right_idx = numElts;

auto maskVec = [&](Value *dsub) -> Value * {
Value *masked = Constant::getNullValue(VT);
for (unsigned i = left_idx; i < right_idx; i++) {
Value *vidx =
ConstantInt::get(Type::getInt32Ty(val->getContext()), i);
Value *el = BuilderM.CreateExtractElement(dsub, vidx);
masked = BuilderM.CreateInsertElement(masked, el, vidx);
}
return masked;
};

if (getWidth() == 1) {
SmallVector<unsigned, 1> eidxs;
for (auto idx : idxs.slice(ignoreFirstSlicesOfDif))
eidxs.push_back((unsigned)cast<ConstantInt>(idx)->getZExtValue());

Value *subdif = extractMeta(BuilderM, dif, eidxs);
return addToDiffe(val, maskVec(subdif), BuilderM, addingType, idxs,
mask);
} else {
SmallVector<SelectInst *, 4> res;
for (unsigned j = 0; j < getWidth(); j++) {
SmallVector<Value *, 1> lidxs;
SmallVector<unsigned, 1> eidxs = {(unsigned)j};

lidxs.push_back(
ConstantInt::get(Type::getInt32Ty(val->getContext()), j));
for (auto idx : idxs.slice(ignoreFirstSlicesOfDif))
eidxs.push_back((unsigned)cast<ConstantInt>(idx)->getZExtValue());
for (auto idx : idxs)
lidxs.push_back(idx);

Value *subdif = extractMeta(BuilderM, dif, eidxs);
for (auto v : addToDiffe(val, maskVec(subdif), BuilderM, addingType,
lidxs, mask))
res.push_back(v);
}
return res;
}
}
}
}

llvm::errs() << " VT: " << *VT << " idxs:{";
for (auto idx : idxs)
llvm::errs() << *idx << ",";
Expand Down
39 changes: 39 additions & 0 deletions enzyme/test/Enzyme/ReverseMode/partial_vec_window.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; RUN: %opt < %s %newLoadEnzyme -passes="enzyme,function(mem2reg,early-cse,sroa,instsimplify,%simplifycfg,adce)" -enzyme-preopt=false -S | FileCheck %s
; Regression test: partial-window accumulation into a fixed vector (<2 x float>).
; Previously asserted: "unhandled accumulate with partial sizes".

source_filename = "partial_vec_window"
target triple = "x86_64-pc-linux-gnu"

%ret2v = type { <2 x float>, <2 x float> }

define %ret2v @make(float %x) {
entry:
%v0 = insertelement <2 x float> zeroinitializer, float %x, i32 0
%r0 = insertvalue %ret2v undef, <2 x float> %v0, 0
%r1 = insertvalue %ret2v %r0, <2 x float> zeroinitializer, 1
ret %ret2v %r1
}

define float @tester(float %x) {
entry:
%call = call %ret2v @make(float %x)
%vec = extractvalue %ret2v %call, 0

; Force "partial" use: only the first 4 bytes of the <2 x float>
%tmp = alloca <2 x float>, align 8
store <2 x float> %vec, <2 x float>* %tmp, align 8
%fp = bitcast <2 x float>* %tmp to float*
%a = load float, float* %fp, align 4

ret float %a
}

define float @test_derivative(float %x) {
entry:
%d = call float (float (float)*, ...) @__enzyme_autodiff(float (float)* @tester, float %x)
ret float %d
}

declare float @__enzyme_autodiff(float (float)*, ...)
; CHECK: @diffetester
Loading