mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-07-14 03:30:10 +00:00
fix performance regression due to Flash Attention duplicated code
This commit is contained in:
parent
3c7c6f8b29
commit
2fc28056fa
@ -304,13 +304,6 @@ def usp_attn_forward(self,
|
|||||||
value=half(v),
|
value=half(v),
|
||||||
window_size=self.window_size)
|
window_size=self.window_size)
|
||||||
|
|
||||||
x = xFuserLongContextAttention()(
|
|
||||||
None,
|
|
||||||
query=half(q),
|
|
||||||
key=half(k),
|
|
||||||
value=half(v),
|
|
||||||
window_size=self.window_size)
|
|
||||||
|
|
||||||
# TODO: padding after attention.
|
# TODO: padding after attention.
|
||||||
# x = torch.cat([x, x.new_zeros(b, s - x.size(1), n, d)], dim=1)
|
# x = torch.cat([x, x.new_zeros(b, s - x.size(1), n, d)], dim=1)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user