Fooocus框架代码分析

news/2024/7/10 21:02:05 标签: stable diffusion, fooocus

由于Fooocus的优秀推理能力,后续考虑从webui切换到Fooocus上,因此对其中的代码要进行深入分析,Fooocus的sdxl版本在11g的显存上跑起来压力不大,但是webui的sdxl版本起码12g。尤其要对比其和webui的优化点,但是在代码层面,并不是和webui同一档次的框架,webui采用了支持hook式的插件系统,但是fooocus因为其midjourney的指向,所以并不是走三方插件的路子。

在autodl上:python launch.py --listen --port 6006

entry_with_update.py

# 启动
python entry_with_update.py --listen

launch.py ->
prepare_environment()->
ini_comfy_args()->
- args_manager.py -> args = comfy_cli.args ->backend/headless/comfy/cli_args.py
download_models()->

webui.py ->
- run_button.click().then(fn=generate_clicked)->
-- modules/async_worker.py->workers()->threading.Thread(target=worker).start()
-- handler()-> # 传入参数并配置
-- prompt_processing/vary/upscale/inpaint/controlnet/
-- imgs = pipeline.process_diffusin(...)-> 

modules/default_pipeline.py -> process_diffusion() 主pipeline,webui中是StableDiffusionProcessingTxt2Img和StableDiffusionProcessingImg2Img两个核心接口。

if latent is None:
    empty_latent = core.generate_empty_latent(width,height,1)
else:
    empty_latent = latent

sampled_latent = core.ksampler(final_unet,final_refiner,positive_cond,negative_cond,empty_latent,steps,denoise,callback,cfg_scale,sampler_name,scheduler_name,switch)    
decoded_latent = core.decode_vae(vae,sampled_latent,...)
images = core.pytorch_to_numpy(decoded_latent)  

默认方法:refresh_everything()

refresh_everything()->

refresh_refiner_model(refiner_model_name)
refresh_base_model(base_model_name)
refresh_loras(loras)

prepare_text_encoder(True)

core.py -> generate_empty_latent()

opEmptyLatentImage.generate(width,height,batch_size)[0]

- backend/headless/nodes.py -> EmptyLatentImage.generate()
- latent = torch.zeros([bs,4,height//8,width//8]) -> {'samples':latent}

core.py->ksampler()->backend/headless/comfy/sample.py

core->ksampler()

latent_image = latent['sampler']
noise = comfy.sample.prepare_noise(latent_image,seed,...)

samples = comfy.sample.sample(model,noise,steps,cfg,sampler_name,scheduler,positive,negative,latent_image,...)
- backend/headless/comfy/sample.py->sample()
- real_model,positive_copy,negative_copy,noise_mask,models=prepare_sampling(model,noise.shape,positive...)
- sampler = comfy.samplers.KSampler(...)
- sampler = sampler.sample(noise,positive_copy,negative_copy,cfg,latent_image...)
-- sampler = sampler_class(self.sampler)
-- sample(self.model,noise,positive,...)

sampler_class()->backend/headless/comfy/samplers.py

sampler_class(name)->
sampler = ksampler(name)->class KSAMPLER(Sampler)

sample->modules/sample_hijack.py (backend/headless/comfy/samplers.py) 劫持了comfy中的sample

sample_hijack(model,noise,positive,negative,cfg,device,sampler,sigmas,model_options,latent_image,denoise_mask,callback,...) ->

positive = positive[:]
negative = negative[:]

model_wrap = model_wrap(model)
- model_denoise = CFGNoisePredictor(model)
- model_wrap = k_diffusion_external.CompVisDenoiser(model_denoise)

calculate_start_end_timesteps(model_wrap,negative)
calculate_start_end_timesteps(model_wrap,positive)
for c in positive/negative:
    create_cond_with_same_area_if_none(negative/positive,c)
pre_run_control(model_wrap,negative+positive) # cfg相关

latent_image = model.process_latent_in(latent_image)

samples = samplers.sample(model_wrap,sigmas,extra_args,...)
model.process_latent_out(samples)

backend/headless/comfy/samplers.py

class KSAMPLER->sample(model_wrap,sigmas,extra_args,callback,...)
model_k = KSamplerOInpaint(model_wrap)

if sampler_name == "dpm_fast":
    samples = k_diffusion_sampling.sample_dpm_fast(model_k,noise,...)
elif sampler_name == "dpm_adaptive":
    samples = k_diffusion_sampling.sample_dpm_adaptive(model_k,noise,...)
else:
    samples = getattr(k_diffusion_sampling,"sample_{}".format(sampler_name))(model_k,noise,...)

backend/headless/comfy/k_diffusion/sampling.py

sampler_dpmpp_2m_sde_gpu(model,x,sigmas,extra_args,callback,...)
noise_sampler = BrownianTreeNoiseSampler(x,sigma_min,..) if noise_sampler is None else noise_sampler
sample_dpmpp_2m_sde(model,x,...)

sample_dpmpp_2m_sde

for i in trange(len(sigmas)-1):
    denoised = model(x,sigmas[i]*s_in,**extra_args)
    if callback is not None:
        callback({'x':x,'i':i,'sigma':sigma[i],'sigma_hat':sigmas[i],'denoised':denoised})
    if sigmas[i+1] == 0:
        x = denoised
    else:
        # DPM-Solver++(2M) SDE
        t, s = -sigmas[i].log(), -sigmas[i + 1].log()
        h = s - t
        eta_h = eta * h
        x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised

        if old_denoised is not None:
            r = h_last / h
            if solver_type == 'heun':
                x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - old_denoised)
            elif solver_type == 'midpoint':
                x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)
         if eta:
            x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise

backend/headless/nodes.py 节点就是类

NODE_CLASS_MAPPINGS = {
    "KSampler": KSampler,
    "CheckpointLoaderSimple": CheckpointLoaderSimple,
    "CLIPTextEncode": CLIPTextEncode,
    "CLIPSetLastLayer": CLIPSetLastLayer,
    "VAEDecode": VAEDecode,
    "VAEEncode": VAEEncode,
    "VAEEncodeForInpaint": VAEEncodeForInpaint,
    "VAELoader": VAELoader,
    "EmptyLatentImage": EmptyLatentImage,
    "LatentUpscale": LatentUpscale,
    "LatentUpscaleBy": LatentUpscaleBy,
    "LatentFromBatch": LatentFromBatch,
    "RepeatLatentBatch": RepeatLatentBatch,
    "SaveImage": SaveImage,
    "PreviewImage": PreviewImage,
    "LoadImage": LoadImage,
    "LoadImageMask": LoadImageMask,
    "ImageScale": ImageScale,
    "ImageScaleBy": ImageScaleBy,
    "ImageInvert": ImageInvert,
    "ImageBatch": ImageBatch,
    "ImagePadForOutpaint": ImagePadForOutpaint,
    "EmptyImage": EmptyImage,
    "ConditioningAverage": ConditioningAverage,
    "ConditioningCombine": ConditioningCombine,
    "ConditioningConcat": ConditioningConcat,
    "ConditioningSetArea": ConditioningSetArea,
    "ConditioningSetAreaPercentage": ConditioningSetAreaPercentage,
    "ConditioningSetMask": ConditioningSetMask,
    "KSamplerAdvanced": KSamplerAdvanced,
    "SetLatentNoiseMask": SetLatentNoiseMask,
    "LatentComposite": LatentComposite,
    "LatentBlend": LatentBlend,
    "LatentRotate": LatentRotate,
    "LatentFlip": LatentFlip,
    "LatentCrop": LatentCrop,
    "LoraLoader": LoraLoader,
    "CLIPLoader": CLIPLoader,
    "UNETLoader": UNETLoader,
    "DualCLIPLoader": DualCLIPLoader,
    "CLIPVisionEncode": CLIPVisionEncode,
    "StyleModelApply": StyleModelApply,
    "unCLIPConditioning": unCLIPConditioning,
    "ControlNetApply": ControlNetApply,
    "ControlNetApplyAdvanced": ControlNetApplyAdvanced,
    "ControlNetLoader": ControlNetLoader,
    "DiffControlNetLoader": DiffControlNetLoader,
    "StyleModelLoader": StyleModelLoader,
    "CLIPVisionLoader": CLIPVisionLoader,
    "VAEDecodeTiled": VAEDecodeTiled,
    "VAEEncodeTiled": VAEEncodeTiled,
    "unCLIPCheckpointLoader": unCLIPCheckpointLoader,
    "GLIGENLoader": GLIGENLoader,
    "GLIGENTextBoxApply": GLIGENTextBoxApply,

    "CheckpointLoader": CheckpointLoader,
    "DiffusersLoader": DiffusersLoader,

    "LoadLatent": LoadLatent,
    "SaveLatent": SaveLatent,

    "ConditioningZeroOut": ConditioningZeroOut,
    "ConditioningSetTimestepRange": ConditioningSetTimestepRange,
}


http://www.niftyadmin.cn/n/5083300.html

相关文章

unity面试八股文 - 基础篇

委托是什么? event 关键字有什么用? 委托: 委托是一种特殊类型的对象,它包含了对一个方法的引用。简单来说,委托就像是一个安全的函数指针。它允许我们创建可在运行时动态更改其引用方法的变量,并且可以指向类中定义…

关于element-ui中,页面上有多个el-table并通过v-if、v-else等控制是否显示时,type=selection勾选框失效或不显示的问题

刚开始是勾选框那一列直接空了什么都不显示,搜索了一下说是给el-table标签增加id,加了之后是显示了,但是点击任何选框都会直接取消全部选中效果,翻了半天源码也没发现到底是哪里事件冲突了还是怎么回事,烦了&#xff0…

【Python】Python 获取天气数据

Python 获取天气数据 检查天气似乎相当简单:打开 Web 浏览器,点击地址栏, 输入天气网站的 URL(或搜索一个,然后点击链接), 等待页面加载,跳过所有的广告等。 其实,如果有一个程序,…

风光十几年的MIUI要无了,小米自研MIOS即将就位

今年8月小米新品发布会上,K60 至尊版亮相的同时带来了下一代系统(或者说UI )的名字:MIUI 15 。 好家伙不知不觉已经要更到 15 了,大家用过几个版本呢? 回想起 MIUI V1 内测发布、MIUI V5 全民刷机、MIUI 8…

基于深度学习的“语义通信编解码技术”框架分类

目录 基于神经网络的语义提取基于神经网络的语义信源编码基于神经网络的语义信源信道联合编码基于神经网络的语义编码与数字调制联合设计参考文献 基于神经网络的语义提取 在现有的信源编码前端加上一个语义提取神经网络[53] ,如图所示。语义提取神经网络的输入是原…

【每日一题】找出数组的串联值

文章目录 Tag题目来源题目解读解题思路方法一:双指针 写在最后 Tag 【模拟】【双指针】【数组】【2023-10-12】 题目来源 2562. 找出数组的串联值 题目解读 串联值指的是将两个数字串联起来形成新的数字。现在要一次计算一个数组的地也给数和最后一个数的串联值&…

Spring ApplicationListener监听器用法

ApplicationListener ApplicationListener是Spring框架中的一个接口,用于监听Spring应用程序中的事件。当应用程序中发生事件时,ApplicationListener会自动触发相应的回调方法,从而实现对事件的处理。 在Spring Boot中,常见的事件…

基于Qt C++的工具箱项目源码,含命令行工具、桌面宠物、文献翻译、文件处理工具、医学图像浏览器、插件市场、设置扩展等工具

一、介绍 1. 基本信息 完整代码下载地址:基于Qt C的工具箱项目源码 TBox是一款基于Qt C的工具箱。用户可以自行选择安装所需的工具(以插件的形式),将TBox打造成专属于自己的效率软件。TBox基本界面展示如下: 2. 使用…