diff --git a/docs/.buildinfo b/docs/.buildinfo
index e79de496..e20fee0d 100644
--- a/docs/.buildinfo
+++ b/docs/.buildinfo
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 7192bb01cb11925cfee048363011ddbc
+config: 46d551dfdf92abdfae075ddecdb73176
tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/.nojekyll b/docs/.nojekyll
deleted file mode 100644
index e69de29b..00000000
diff --git a/docs/_images/res_clusterings.png b/docs/_images/res_clusterings.png
new file mode 100644
index 00000000..94c1deb3
Binary files /dev/null and b/docs/_images/res_clusterings.png differ
diff --git a/docs/_images/res_constraint.png b/docs/_images/res_constraint.png
new file mode 100644
index 00000000..bc504ad7
Binary files /dev/null and b/docs/_images/res_constraint.png differ
diff --git a/docs/_modules/easygraph/classes/base.html b/docs/_modules/easygraph/classes/base.html
deleted file mode 100644
index 86495894..00000000
--- a/docs/_modules/easygraph/classes/base.html
+++ /dev/null
@@ -1,1015 +0,0 @@
-
-
-
[docs]defload_structure(file_path:Union[str,Path]):
-r"""Load a DHG's structure from a file. The supported structure includes: ``Graph``, ``DiGraph``, ``BiGraph``, ``Hypergraph``.
-
- Args:
- ``file_path`` (``Union[str, Path]``): The file path to load the DHG's structure.
- """
- importpickleaspkl
-
- importeasygraph
-
- file_path=Path(file_path)
- assertfile_path.exists(),f"{file_path} does not exist"
- withopen(file_path,"rb")asf:
- data=pkl.load(f)
- class_name,state_dict=data["class"],data["state_dict"]
- structure_class=getattr(easygraph,class_name)
- structure=structure_class.from_state_dict(state_dict)
- returnstructure
-
-
-
[docs]classBaseHypergraph:
-r"""The ``BaseHypergraph`` class is the base class for all hypergraph structures.
-
- Args:
- ``num_v`` (``int``): The number of vertices in the hypergraph.
- ``e_list_v2e`` (``Union[List[int], List[List[int]]]``, optional): A list of hyperedges describes how the vertices point to the hyperedges. Defaults to ``None``.
- ``e_list_e2v`` (``Union[List[int], List[List[int]]]``, optional): A list of hyperedges describes how the hyperedges point to the vertices. Defaults to ``None``.
- ``w_list_v2e`` (``Union[List[float], List[List[float]]]``, optional): The weights are attached to the connections from vertices to hyperedges, which has the same shape
- as ``e_list_v2e``. If set to ``None``, the value ``1`` is used for all connections. Defaults to ``None``.
- ``w_list_e2v`` (``Union[List[float], List[List[float]]]``, optional): The weights are attached to the connections from the hyperedges to the vertices, which has the
- same shape to ``e_list_e2v``. If set to ``None``, the value ``1`` is used for all connections. Defaults to ``None``.
- ``e_weight`` (``Union[float, List[float]]``, optional): A list of weights for hyperedges. If set to ``None``, the value ``1`` is used for all hyperedges. Defaults to ``None``.
- ``v_weight`` (``Union[float, List[float]]``, optional): Weights for vertices. If set to ``None``, the value ``1`` is used for all vertices. Defaults to ``None``.
- ``device`` (``torch.device``, optional): The device to store the hypergraph. Defaults to ``torch.device('cpu')``.
- """
-
- def__init__(
- self,
- num_v:int,
- e_list_v2e:Optional[Union[List[int],List[List[int]]]]=None,
- e_list_e2v:Optional[Union[List[int],List[List[int]]]]=None,
- w_list_v2e:Optional[Union[List[float],List[List[float]]]]=None,
- w_list_e2v:Optional[Union[List[float],List[List[float]]]]=None,
- e_weight:Optional[Union[float,List[float]]]=None,
- v_weight:Optional[List[float]]=None,
- device:torch.device=torch.device("cpu"),
- ):
- assert(
- isinstance(num_v,int)andnum_v>0
- ),"num_v should be a positive integer"
- self.clear()
- self._num_v=num_v
- self.device=device
-
- @abc.abstractmethod
- def__repr__(self)->str:
-r"""Print the hypergraph information."""
-
- @property
- @abc.abstractmethod
- defstate_dict(self)->Dict[str,Any]:
-r"""Get the state dict of the hypergraph."""
-
-
[docs]@abc.abstractmethod
- defsave(self,file_path:Union[str,Path]):
-r"""Save the DHG's hypergraph structure to a file.
-
- Args:
- ``file_path`` (``str``): The file_path to store the DHG's hypergraph structure.
- """
-
-
[docs]@staticmethod
- @abc.abstractmethod
- defload(file_path:Union[str,Path]):
-r"""Load the DHG's hypergraph structure from a file.
-
- Args:
- ``file_path`` (``str``): The file path to load the DHG's hypergraph structure.
- """
-
-
[docs]@staticmethod
- @abc.abstractmethod
- deffrom_state_dict(state_dict:dict):
-r"""Load the DHG's hypergraph structure from the state dict.
-
- Args:
- ``state_dict`` (``dict``): The state dict to load the DHG's hypergraph.
- """
-
-
[docs]@abc.abstractmethod
- defdraw(self,**kwargs):
-r"""Draw the structure."""
-
-
[docs]defclear(self):
-r"""Remove all hyperedges and caches from the hypergraph."""
- self._clear_raw()
- self._clear_cache()
[docs]@abc.abstractmethod
- defclone(self)->"BaseHypergraph":
-r"""Return a copy of this type of hypergraph."""
-
-
[docs]defto(self,device:torch.device):
-r"""Move the hypergraph to the specified device.
-
- Args:
- ``device`` (``torch.device``): The device to store the hypergraph.
- """
- self.device=device
- forvinself.vars_for_DL:
- ifvinself.cacheandself.cache[v]isnotNone:
- self.cache[v]=self.cache[v].to(device)
- fornameinself.group_names:
- if(
- vinself.group_cache[name]
- andself.group_cache[name][v]isnotNone
- ):
- self.group_cache[name][v]=self.group_cache[name][v].to(device)
- returnself
-
- # utils
- def_hyperedge_code(self,src_v_set:List[int],dst_v_set:List[int])->Tuple:
-r"""Generate the hyperedge code.
-
- Args:
- ``src_v_set`` (``List[int]``): The source vertex set.
- ``dst_v_set`` (``List[int]``): The destination vertex set.
- """
- returntuple([src_v_set,dst_v_set])
-
- def_merge_hyperedges(self,e1:dict,e2:dict,op:str="mean"):
- assertopin[
- "mean",
- "sum",
- "max",
- ],"Hyperedge merge operation must be one of ['mean', 'sum', 'max']"
- _func={
- "mean":lambdax,y:(x+y)/2,
- "sum":lambdax,y:x+y,
- "max":lambdax,y:max(x,y),
- }
- _e={}
- if"w_v2e"ine1and"w_v2e"ine2:
- for_idxinrange(len(e1["w_v2e"])):
- _e["w_v2e"]=_func[op](e1["w_v2e"][_idx],e2["w_v2e"][_idx])
- if"w_e2v"ine1and"w_e2v"ine2:
- for_idxinrange(len(e1["w_e2v"])):
- _e["w_e2v"]=_func[op](e1["w_e2v"][_idx],e2["w_e2v"][_idx])
- _e["w_e"]=_func[op](e1["w_e"],e2["w_e"])
- return_e
-
- @staticmethod
- def_format_e_list(e_list:Union[List[int],List[List[int]]])->List[List[int]]:
-r"""Format the hyperedge list.
-
- Args:
- ``e_list`` (``List[int]`` or ``List[List[int]]``): The hyperedge list.
- """
- iftype(e_list[0])in(int,float):
- return[tuple(sorted(e_list))]
- eliftype(e_list)==tuple:
- e_list=list(e_list)
- eliftype(e_list)==list:
- pass
- else:
- raiseTypeError("e_list must be List[int] or List[List[int]].")
- for_idxinrange(len(e_list)):
- e_list[_idx]=tuple(sorted(e_list[_idx]))
- returne_list
-
- @staticmethod
- def_format_e_list_and_w_on_them(
- e_list:Union[List[int],List[List[int]]],
- w_list:Optional[Union[List[int],List[List[int]]]]=None,
- ):
-r"""Format ``e_list`` and ``w_list``.
-
- Args:
- ``e_list`` (Union[List[int], List[List[int]]]): Hyperedge list.
- ``w_list`` (Optional[Union[List[int], List[List[int]]]]): Weights on connections. Defaults to ``None``.
- """
- bad_connection_msg=(
- "The weight on connections between vertices and hyperedges must have the"
- " same size as the hyperedges."
- )
- ifisinstance(e_list,tuple):
- e_list=list(e_list)
- ifw_listisnotNoneandisinstance(w_list,tuple):
- w_list=list(w_list)
- ifisinstance(e_list[0],int)andw_listisNone:
- w_list=[1]*len(e_list)
- e_list,w_list=[e_list],[w_list]
- elifisinstance(e_list[0],int)andw_listisnotNone:
- assertlen(e_list)==len(w_list),bad_connection_msg
- e_list,w_list=[e_list],[w_list]
- elifisinstance(e_list[0],list)andw_listisNone:
- w_list=[[1]*len(e)foreine_list]
- assertlen(e_list)==len(w_list),bad_connection_msg
- # TODO: this step can be speeded up
- foridxinrange(len(e_list)):
- assertlen(e_list[idx])==len(w_list[idx]),bad_connection_msg
- cur_e,cur_w=np.array(e_list[idx]),np.array(w_list[idx])
- sorted_idx=np.argsort(cur_e)
- e_list[idx]=tuple(cur_e[sorted_idx].tolist())
- w_list[idx]=cur_w[sorted_idx].tolist()
- returne_list,w_list
-
- def_fetch_H_of_group(self,direction:str,group_name:str):
-r"""Fetch the H matrix of the specified hyperedge group with ``torch.sparse_coo_tensor`` format.
-
- Args:
- ``direction`` (``str``): The direction of hyperedges can be either ``'v2e'`` or ``'e2v'``.
- ``group_name`` (``str``): The name of the group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- assertdirectionin["v2e","e2v"],"direction must be one of ['v2e', 'e2v']"
- ifdirection=="v2e":
- select_idx=0
- else:
- select_idx=1
- num_e=len(self._raw_groups[group_name])
- e_idx,v_idx=[],[]
- for_e_idx,einenumerate(self._raw_groups[group_name].keys()):
- sub_e=e[select_idx]
- v_idx.extend(sub_e)
- e_idx.extend([_e_idx]*len(sub_e))
- H=torch.sparse_coo_tensor(
- torch.tensor([v_idx,e_idx],dtype=torch.long),
- torch.ones(len(v_idx)),
- torch.Size([self.num_v,num_e]),
- device=self.device,
- ).coalesce()
- returnH
-
- def_fetch_R_of_group(self,direction:str,group_name:str):
-r"""Fetch the R matrix of the specified hyperedge group with ``torch.sparse_coo_tensor`` format.
-
- Args:
- ``direction`` (``str``): The direction of hyperedges can be either ``'v2e'`` or ``'e2v'``.
- ``group_name`` (``str``): The name of the group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- assertdirectionin["v2e","e2v"],"direction must be one of ['v2e', 'e2v']"
- ifdirection=="v2e":
- select_idx=0
- else:
- select_idx=1
- num_e=len(self._raw_groups[group_name])
- e_idx,v_idx,w_list=[],[],[]
- for_e_idx,einenumerate(self._raw_groups[group_name].keys()):
- sub_e=e[select_idx]
- v_idx.extend(sub_e)
- e_idx.extend([_e_idx]*len(sub_e))
- w_list.extend(self._raw_groups[group_name][e][f"w_{direction}"])
- R=torch.sparse_coo_tensor(
- torch.vstack([v_idx,e_idx]),
- torch.tensor(w_list),
- torch.Size([self.num_v,num_e]),
- device=self.device,
- ).coalesce()
- returnR
-
- def_fetch_W_of_group(self,group_name:str):
-r"""Fetch the W matrix of the specified hyperedge group with ``torch.sparse_coo_tensor`` format.
-
- Args:
- ``group_name`` (``str``): The name of the group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- w_list=[content["w_e"]forcontentinself._raw_groups[group_name].values()]
- W=torch.tensor(w_list,device=self.device).view((-1,1))
- returnW
-
- # some structure modification functions
-
[docs]defadd_hyperedges(
- self,
- e_list_v2e:Union[List[int],List[List[int]]],
- e_list_e2v:Union[List[int],List[List[int]]],
- w_list_v2e:Optional[Union[List[float],List[List[float]]]]=None,
- w_list_e2v:Optional[Union[List[float],List[List[float]]]]=None,
- e_weight:Optional[Union[float,List[float]]]=None,
- merge_op:str="mean",
- group_name:str="main",
- ):
-r"""Add hyperedges to the hypergraph. If the ``group_name`` is not specified, the hyperedges will be added to the default ``main`` hyperedge group.
-
- Args:
- ``num_v`` (``int``): The number of vertices in the hypergraph.
- ``e_list_v2e`` (``Union[List[int], List[List[int]]]``): A list of hyperedges describes how the vertices point to the hyperedges.
- ``e_list_e2v`` (``Union[List[int], List[List[int]]]``): A list of hyperedges describes how the hyperedges point to the vertices.
- ``w_list_v2e`` (``Union[List[float], List[List[float]]]``, optional): The weights are attached to the connections from vertices to hyperedges, which has the same shape
- as ``e_list_v2e``. If set to ``None``, the value ``1`` is used for all connections. Defaults to ``None``.
- ``w_list_e2v`` (``Union[List[float], List[List[float]]]``, optional): The weights are attached to the connections from the hyperedges to the vertices, which has the
- same shape to ``e_list_e2v``. If set to ``None``, the value ``1`` is used for all connections. Defaults to ``None``.
- ``e_weight`` (``Union[float, List[float]]``, optional): A list of weights for hyperedges. If set to ``None``, the value ``1`` is used for all hyperedges. Defaults to ``None``.
- ``merge_op`` (``str``): The merge operation for the conflicting hyperedges. The possible values are ``mean``, ``sum``, ``max``, and ``min``. Defaults to ``mean``.
- ``group_name`` (``str``, optional): The target hyperedge group to add these hyperedges. Defaults to the ``main`` hyperedge group.
- """
- e_list_v2e,w_list_v2e=self._format_e_list_and_w_on_them(
- e_list_v2e,w_list_v2e
- )
- e_list_e2v,w_list_e2v=self._format_e_list_and_w_on_them(
- e_list_e2v,w_list_e2v
- )
- ife_weightisNone:
- e_weight=[1.0]*len(e_list_v2e)
- assertlen(e_list_v2e)==len(
- e_weight
- ),"The number of hyperedges and the number of weights are not equal."
- assertlen(e_list_v2e)==len(
- e_list_e2v
- ),"Hyperedges of 'v2e' and 'e2v' must have the same size."
- for_idxinrange(len(e_list_v2e)):
- self._add_hyperedge(
- self._hyperedge_code(e_list_v2e[_idx],e_list_e2v[_idx]),
- {
- "w_v2e":w_list_v2e[_idx],
- "w_e2v":w_list_e2v[_idx],
- "w_e":e_weight[_idx],
- },
- merge_op,
- group_name,
- )
- self._clear_cache(group_name)
-
- def_add_hyperedge(
- self,
- hyperedge_code:Tuple[List[int],List[int]],
- content:Dict[str,Any],
- merge_op:str,
- group_name:str,
- ):
-r"""Add a hyperedge to the specified hyperedge group.
-
- Args:
- ``hyperedge_code`` (``Tuple[List[int], List[int]]``): The hyperedge code.
- ``content`` (``Dict[str, Any]``): The content of the hyperedge.
- ``merge_op`` (``str``): The merge operation for the conflicting hyperedges.
- ``group_name`` (``str``): The target hyperedge group to add this hyperedge.
- """
- ifgroup_namenotinself.group_names:
- self._raw_groups[group_name]={}
- self._raw_groups[group_name][hyperedge_code]=content
- else:
- ifhyperedge_codenotinself._raw_groups[group_name]:
- self._raw_groups[group_name][hyperedge_code]=content
- else:
- self._raw_groups[group_name][hyperedge_code]=self._merge_hyperedges(
- self._raw_groups[group_name][hyperedge_code],content,merge_op
- )
-
-
[docs]defremove_hyperedges(
- self,
- e_list_v2e:Union[List[int],List[List[int]]],
- e_list_e2v:Union[List[int],List[List[int]]],
- group_name:Optional[str]=None,
- ):
-r"""Remove the specified hyperedges from the hypergraph.
-
- Args:
- ``e_list_v2e`` (``Union[List[int], List[List[int]]]``): A list of hyperedges describes how the vertices point to the hyperedges.
- ``e_list_e2v`` (``Union[List[int], List[List[int]]]``): A list of hyperedges describes how the hyperedges point to the vertices.
- ``group_name`` (``str``, optional): Remove these hyperedges from the specified hyperedge group. If not specified, the function will
- remove those hyperedges from all hyperedge groups. Defaults to the ``None``.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- assertlen(e_list_v2e)==len(
- e_list_e2v
- ),"Hyperedges of 'v2e' and 'e2v' must have the same size."
- e_list_v2e=self._format_e_list(e_list_v2e)
- e_list_e2v=self._format_e_list(e_list_e2v)
- ifgroup_nameisNone:
- for_idxinrange(len(e_list_v2e)):
- e_code=self._hyperedge_code(e_list_v2e[_idx],e_list_e2v[_idx])
- fornameinself.group_names:
- self._raw_groups[name].pop(e_code,None)
- else:
- for_idxinrange(len(e_list_v2e)):
- e_code=self._hyperedge_code(e_list_v2e[_idx],e_list_e2v[_idx])
- self._raw_groups[group_name].pop(e_code,None)
- self._clear_cache(group_name)
-
-
[docs]@abc.abstractmethod
- defdrop_hyperedges(self,drop_rate:float,ord="uniform"):
-r"""Randomly drop hyperedges from the hypergraph. This function will return a new hypergraph with non-dropped hyperedges.
-
- Args:
- ``drop_rate`` (``float``): The drop rate of hyperedges.
- ``ord`` (``str``): The order of dropping edges. Currently, only ``'uniform'`` is supported. Defaults to ``uniform``.
- """
-
-
[docs]@abc.abstractmethod
- defdrop_hyperedges_of_group(
- self,group_name:str,drop_rate:float,ord="uniform"
- ):
-r"""Randomly drop hyperedges from the specified hyperedge group. This function will return a new hypergraph with non-dropped hyperedges.
-
- Args:
- ``group_name`` (``str``): The name of the hyperedge group.
- ``drop_rate`` (``float``): The drop rate of hyperedges.
- ``ord`` (``str``): The order of dropping edges. Currently, only ``'uniform'`` is supported. Defaults to ``uniform``.
- """
-
- # properties for the hypergraph
- @property
- defv(self)->List[int]:
-r"""Return the list of vertices."""
- ifself.cache.get("v")isNone:
- self.cache["v"]=list(range(self.num_v))
- returnself.cache["v"]
-
- @property
- defv_weight(self)->List[float]:
-r"""Return the vertex weights of the hypergraph."""
- ifself._v_weightisNone:
- self._v_weight=[1.0]*self.num_v
- returnself._v_weight
-
- @v_weight.setter
- defv_weight(self,v_weight:List[float]):
-r"""Set the vertex weights of the hypergraph."""
- assert(
- len(v_weight)==self.num_v
- ),"The length of vertex weights must be equal to the number of vertices."
- self._v_weight=v_weight
- self._clear_cache()
-
- @property
- @abc.abstractmethod
- defe(self)->Tuple[List[List[int]],List[float]]:
-r"""Return all hyperedges and weights in the hypergraph."""
-
-
[docs]@abc.abstractmethod
- defe_of_group(self,group_name:str)->Tuple[List[List[int]],List[float]]:
-r"""Return all hyperedges and weights in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
-
- @property
- defnum_v(self)->int:
-r"""Return the number of vertices in the hypergraph."""
- returnself._num_v
-
- @property
- defnum_e(self)->int:
-r"""Return the number of hyperedges in the hypergraph."""
- _num_e=0
- fornameinself.group_names:
- _num_e+=len(self._raw_groups[name])
- return_num_e
-
-
[docs]defnum_e_of_group(self,group_name:str)->int:
-r"""Return the number of hyperedges in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnlen(self._raw_groups[group_name])
-
- @property
- defnum_groups(self)->int:
-r"""Return the number of hyperedge groups in the hypergraph."""
- returnlen(self._raw_groups)
-
- @property
- defgroup_names(self)->List[str]:
-r"""Return the names of hyperedge groups in the hypergraph."""
- returnlist(self._raw_groups.keys())
-
- # properties for deep learning
- @property
- @abc.abstractmethod
- defvars_for_DL(self)->List[str]:
-r"""Return a name list of available variables for deep learning in this type of hypergraph.
- """
-
- @property
- defW_v(self)->torch.Tensor:
-r"""Return the vertex weight matrix of the hypergraph."""
- ifself.cache["W_v"]isNone:
- self.cache["W_v"]=torch.tensor(
- self.v_weight,dtype=torch.float,device=self.device
- ).view(-1,1)
- returnself.cache["W_v"]
-
- @property
- defW_e(self)->torch.Tensor:
-r"""Return the hyperedge weight matrix of the hypergraph."""
- ifself.cache["W_e"]isNone:
- _tmp=[self.W_e_of_group(name)fornameinself.group_names]
- self.cache["W_e"]=torch.cat(_tmp,dim=0)
- returnself.cache["W_e"]
-
-
[docs]defW_e_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the hyperedge weight matrix of the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name]["W_e"]isNone:
- self.group_cache[group_name]["W_e"]=self._fetch_W_of_group(group_name)
- returnself.group_cache[group_name]["W_e"]
-
- @property
- @abc.abstractmethod
- defH(self)->torch.Tensor:
-r"""Return the hypergraph incidence matrix."""
-
- @property
- @abc.abstractmethod
- defH_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the hypergraph incidence matrix in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
-
- @property
- defH_v2e(self)->torch.Tensor:
-r"""Return the hypergraph incidence matrix with ``sparse matrix`` format."""
- ifself.cache.get("H_v2e")isNone:
- _tmp=[self.H_v2e_of_group(name)fornameinself.group_names]
- self.cache["H_v2e"]=torch.cat(_tmp,dim=1)
- returnself.cache["H_v2e"]
-
-
[docs]defH_v2e_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the hypergraph incidence matrix with ``sparse matrix`` format in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("H_v2e")isNone:
- self.group_cache[group_name]["H_v2e"]=self._fetch_H_of_group(
- "v2e",group_name
- )
- returnself.group_cache[group_name]["H_v2e"]
[docs]defH_e2v_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the hypergraph incidence matrix with ``sparse matrix`` format in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("H_e2v")isNone:
- self.group_cache[group_name]["H_e2v"]=self._fetch_H_of_group(
- "e2v",group_name
- )
- returnself.group_cache[group_name]["H_e2v"]
-
- @property
- defR_v2e(self)->torch.Tensor:
-r"""Return the weight matrix of connections (vertices point to hyperedges) with ``sparse matrix`` format.
- """
- ifself.cache.get("R_v2e")isNone:
- _tmp=[self.R_v2e_of_group(name)fornameinself.group_names]
- self.cache["R_v2e"]=torch.cat(_tmp,dim=1)
- returnself.cache["R_v2e"]
-
-
[docs]defR_v2e_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the weight matrix of connections (vertices point to hyperedges) with ``sparse matrix`` format in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("R_v2e")isNone:
- self.group_cache[group_name]["R_v2e"]=self._fetch_R_of_group(
- "v2e",group_name
- )
- returnself.group_cache[group_name]["R_v2e"]
-
- @property
- defR_e2v(self)->torch.Tensor:
-r"""Return the weight matrix of connections (hyperedges point to vertices) with ``sparse matrix`` format.
- """
- ifself.cache.get("R_e2v")isNone:
- _tmp=[self.R_e2v_of_group(name)fornameinself.group_names]
- self.cache["R_e2v"]=torch.cat(_tmp,dim=1)
- returnself.cache["R_e2v"]
-
-
[docs]defR_e2v_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the weight matrix of connections (hyperedges point to vertices) with ``sparse matrix`` format in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("R_e2v")isNone:
- self.group_cache[group_name]["R_e2v"]=self._fetch_R_of_group(
- "e2v",group_name
- )
- returnself.group_cache[group_name]["R_e2v"]
-
- # spectral-based smoothing
-
[docs]defsmoothing(self,X:torch.Tensor,L:torch.Tensor,lamb:float)->torch.Tensor:
-r"""Spectral-based smoothing.
-
- .. math::
- X_{smoothed} = X + \lambda \mathcal{L} X
-
- Args:
- ``X`` (``torch.Tensor``): The vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``L`` (``torch.Tensor``): The Laplacian matrix with ``torch.sparse_coo_tensor`` format. Size :math:`(|\mathcal{V}|, |\mathcal{V}|)`.
- ``lamb`` (``float``): :math:`\lambda`, the strength of smoothing.
- """
- returnX+lamb*torch.sparse.mm(L,X)
-
- # message passing functions
-
[docs]@abc.abstractmethod
- defv2e_aggregation(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message aggretation step of ``vertices to hyperedges``.
-
- Args:
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defv2e_aggregation_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message aggregation step of ``vertices to hyperedges`` in specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defv2e_update(self,X:torch.Tensor,e_weight:Optional[torch.Tensor]=None):
-r"""Message update step of ``vertices to hyperedges``.
-
- Args:
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defv2e_update_of_group(
- self,group_name:str,X:torch.Tensor,e_weight:Optional[torch.Tensor]=None
- ):
-r"""Message update step of ``vertices to hyperedges`` in specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defv2e(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_weight:Optional[torch.Tensor]=None,
- e_weight:Optional[torch.Tensor]=None,
- ):
-r"""Message passing of ``vertices to hyperedges``. The combination of ``v2e_aggregation`` and ``v2e_update``.
-
- Args:
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defv2e_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_weight:Optional[torch.Tensor]=None,
- e_weight:Optional[torch.Tensor]=None,
- ):
-r"""Message passing of ``vertices to hyperedges`` in specified hyperedge group. The combination of ``e2v_aggregation_of_group`` and ``e2v_update_of_group``.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defe2v_aggregation(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- e2v_weight:Optional[torch.Tensor]=None,
- ):
-r"""Message aggregation step of ``hyperedges to vertices``.
-
- Args:
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defe2v_aggregation_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- e2v_weight:Optional[torch.Tensor]=None,
- ):
-r"""Message aggregation step of ``hyperedges to vertices`` in specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defe2v_update(self,X:torch.Tensor,v_weight:Optional[torch.Tensor]=None):
-r"""Message update step of ``hyperedges to vertices``.
-
- Args:
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``v_weight`` (``torch.Tensor``, optional): The vertex weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defe2v_update_of_group(
- self,group_name:str,X:torch.Tensor,v_weight:Optional[torch.Tensor]=None
- ):
-r"""Message update step of ``hyperedges to vertices`` in specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``v_weight`` (``torch.Tensor``, optional): The vertex weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defe2v(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- e2v_weight:Optional[torch.Tensor]=None,
- v_weight:Optional[torch.Tensor]=None,
- ):
-r"""Message passing of ``hyperedges to vertices``. The combination of ``e2v_aggregation`` and ``e2v_update``.
-
- Args:
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``v_weight`` (``torch.Tensor``, optional): The vertex weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defe2v_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- e2v_weight:Optional[torch.Tensor]=None,
- v_weight:Optional[torch.Tensor]=None,
- ):
-r"""Message passing of ``hyperedges to vertices`` in specified hyperedge group. The combination of ``e2v_aggregation_of_group`` and ``e2v_update_of_group``.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``v_weight`` (``torch.Tensor``, optional): The vertex weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defv2v(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_aggr:Optional[str]=None,
- v2e_weight:Optional[torch.Tensor]=None,
- e_weight:Optional[torch.Tensor]=None,
- e2v_aggr:Optional[str]=None,
- e2v_weight:Optional[torch.Tensor]=None,
- v_weight:Optional[torch.Tensor]=None,
- ):
-r"""Message passing of ``vertices to vertices``. The combination of ``v2e`` and ``e2v``.
-
- Args:
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, this ``aggr`` will be used to both ``v2e`` and ``e2v``.
- ``v2e_aggr`` (``str``, optional): The aggregation method for hyperedges to vertices. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, it will override the ``aggr`` in ``e2v``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e2v_aggr`` (``str``, optional): The aggregation method for vertices to hyperedges. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, it will override the ``aggr`` in ``v2e``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``v_weight`` (``torch.Tensor``, optional): The vertex weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
-
-
[docs]@abc.abstractmethod
- defv2v_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_aggr:Optional[str]=None,
- v2e_weight:Optional[torch.Tensor]=None,
- e_weight:Optional[torch.Tensor]=None,
- e2v_aggr:Optional[str]=None,
- e2v_weight:Optional[torch.Tensor]=None,
- v_weight:Optional[torch.Tensor]=None,
- ):
-r"""Message passing of ``vertices to vertices`` in specified hyperedge group. The combination of ``v2e_of_group`` and ``e2v_of_group``.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, this ``aggr`` will be used to both ``v2e_of_group`` and ``e2v_of_group``.
- ``v2e_aggr`` (``str``, optional): The aggregation method for hyperedges to vertices. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, it will override the ``aggr`` in ``e2v_of_group``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e2v_aggr`` (``str``, optional): The aggregation method for vertices to hyperedges. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, it will override the ``aggr`` in ``v2e_of_group``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``v_weight`` (``torch.Tensor``, optional): The vertex weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
[docs]classDiGraph(Graph):
-"""
- Base class for directed graphs.
-
- Nodes are allowed for any hashable Python objects, including int, string, dict, etc.
- Edges are stored as Python dict type, with optional key/value attributes.
-
- Parameters
- ----------
- graph_attr : keywords arguments, optional (default : None)
- Attributes to add to graph as key=value pairs.
-
- See Also
- --------
- Graph
-
- Examples
- --------
- Create an empty directed graph with no nodes and edges.
-
- >>> G = eg.Graph()
-
- Create a deep copy graph *G2* from existing Graph *G1*.
-
- >>> G2 = G1.copy()
-
- Create an graph with attributes.
-
- >>> G = eg.Graph(name='Karate Club', date='2020.08.21')
-
- **Attributes:**
-
- Returns the adjacency matrix of the graph.
-
- >>> G.adj
-
- Returns all the nodes with their attributes.
-
- >>> G.nodes
-
- Returns all the edges with their attributes.
-
- >>> G.edges
-
- """
-
- gnn_data_dict_factory=dict
- graph_attr_dict_factory=dict
- node_dict_factory=dict
- node_attr_dict_factory=dict
- adjlist_outer_dict_factory=dict
- adjlist_inner_dict_factory=dict
- edge_attr_dict_factory=dict
-
- def__init__(self,incoming_graph_data=None,**graph_attr):
- self.graph=self.graph_attr_dict_factory()
- self._ndata=self.gnn_data_dict_factory()
- self._node=self.node_dict_factory()
- self._adj=self.adjlist_outer_dict_factory()
- self._pred=self.adjlist_outer_dict_factory()
- self.cflag=0
- ifincoming_graph_dataisnotNone:
- convert.to_easygraph_graph(incoming_graph_data,create_using=self)
- self.graph.update(graph_attr)
-
- def__iter__(self):
- returniter(self._node)
-
- def__len__(self):
- returnlen(self._node)
-
- def__contains__(self,node):
- try:
- returnnodeinself._node
- exceptTypeError:
- returnFalse
-
- def__getitem__(self,node):
- # return list(self._adj[node].keys())
- returnself._adj[node]
-
- @property
- defndata(self):
- returnself._ndata
-
- @property
- defpred(self):
- returnself._pred
-
- @property
- defadj(self):
- returnself._adj
-
- @property
- defnodes(self):
- returnself._node
- # return [node for node in self._node]
-
- @property
- defedges(self):
- edges=list()
- foruinself._adj:
- forvinself._adj[u]:
- edges.append((u,v,self._adj[u][v]))
- returnedges
-
- @property
- defname(self):
-"""String identifier of the graph.
-
- This graph attribute appears in the attribute dict G.graph
- keyed by the string `"name"`. as well as an attribute (technically
- a property) `G.name`. This is entirely user controlled.
- """
- returnself.graph.get("name","")
-
- @name.setter
- defname(self,s):
- self.graph["name"]=s
-
-
[docs]defout_degree(self,weight="weight"):
-"""Returns the weighted out degree of each node.
-
- Parameters
- ----------
- weight : string, optional (default : 'weight')
- Weight key of the original weighted graph.
-
- Returns
- -------
- out_degree : dict
- Each node's (key) weighted out degree (value).
-
- Notes
- -----
- If the graph is not weighted, all the weights will be regarded as 1.
-
- See Also
- --------
- in_degree
- degree
-
- Examples
- --------
-
- >>> G.out_degree(weight='weight')
-
- """
- degree=dict()
- foru,v,dinself.edges:
- ifuindegree:
- degree[u]+=d.get(weight,1)
- else:
- degree[u]=d.get(weight,1)
-
- # For isolated nodes
- fornodeinself.nodes:
- ifnodenotindegree:
- degree[node]=0
-
- returndegree
-
-
[docs]defin_degree(self,weight="weight"):
-"""Returns the weighted in degree of each node.
-
- Parameters
- ----------
- weight : string, optional (default : 'weight')
- Weight key of the original weighted graph.
-
- Returns
- -------
- in_degree : dict
- Each node's (key) weighted in degree (value).
-
- Notes
- -----
- If the graph is not weighted, all the weights will be regarded as 1.
-
- See Also
- --------
- out_degree
- degree
-
- Examples
- --------
-
- >>> G.in_degree(weight='weight')
-
- """
- degree=dict()
- foru,v,dinself.edges:
- ifvindegree:
- degree[v]+=d.get(weight,1)
- else:
- degree[v]=d.get(weight,1)
-
- # For isolated nodes
- fornodeinself.nodes:
- ifnodenotindegree:
- degree[node]=0
-
- returndegree
-
-
[docs]defdegree(self,weight="weight"):
-"""Returns the weighted degree of each node, i.e. sum of out/in degree.
-
- Parameters
- ----------
- weight : string, optional (default : 'weight')
- Weight key of the original weighted graph.
-
- Returns
- -------
- degree : dict
- Each node's (key) weighted in degree (value).
- For directed graph, it returns the sum of out degree and in degree.
-
- Notes
- -----
- If the graph is not weighted, all the weights will be regarded as 1.
-
- See also
- --------
- out_degree
- in_degree
-
- Examples
- --------
-
- >>> G.degree()
- >>> G.degree(weight='weight')
-
- or you can customize the weight key
-
- >>> G.degree(weight='weight_1')
-
- """
- degree=dict()
- outdegree=self.out_degree(weight=weight)
- indegree=self.in_degree(weight=weight)
- foruinoutdegree:
- degree[u]=outdegree[u]+indegree[u]
- returndegree
-
-
[docs]defsize(self,weight=None):
-"""Returns the number of edges or total of all edge weights.
-
- Parameters
- -----------
- weight : String or None, optional
- The weight key. If None, it will calculate the number of
- edges, instead of total of all edge weights.
-
- Returns
- -------
- size : int or float, optional (default: None)
- The number of edges or total of all edge weights.
-
- Examples
- --------
-
- Returns the number of edges in G:
-
- >>> G.size()
-
- Returns the total of all edge weights in G:
-
- >>> G.size(weight='weight')
-
- """
- s=sum(dforv,dinself.out_degree(weight=weight).items())
- returnint(s)ifweightisNoneelses
-
-
[docs]defnumber_of_edges(self,u=None,v=None):
-"""Returns the number of edges between two nodes.
-
- Parameters
- ----------
- u, v : nodes, optional (default=all edges)
- If u and v are specified, return the number of edges between
- u and v. Otherwise return the total number of all edges.
-
- Returns
- -------
- nedges : int
- The number of edges in the graph. If nodes `u` and `v` are
- specified return the number of edges between those nodes. If
- the graph is directed, this only returns the number of edges
- from `u` to `v`.
-
- See Also
- --------
- size
-
- Examples
- --------
- For undirected graphs, this method counts the total number of
- edges in the graph:
-
- >>> G = eg.path_graph(4)
- >>> G.number_of_edges()
- 3
-
- If you specify two nodes, this counts the total number of edges
- joining the two nodes:
-
- >>> G.number_of_edges(0, 1)
- 1
-
- For directed graphs, this method can count the total number of
- directed edges from `u` to `v`:
-
- >>> G = eg.DiGraph()
- >>> G.add_edge(0, 1)
- >>> G.add_edge(1, 0)
- >>> G.number_of_edges(0, 1)
- 1
-
- """
- ifuisNone:
- returnint(self.size())
- ifvinself._adj[u]:
- return1
- return0
-
-
[docs]defnbunch_iter(self,nbunch=None):
-"""Returns an iterator over nodes contained in nbunch that are
- also in the graph.
-
- The nodes in nbunch are checked for membership in the graph
- and if not are silently ignored.
-
- Parameters
- ----------
- nbunch : single node, container, or all nodes (default= all nodes)
- The view will only report edges incident to these nodes.
-
- Returns
- -------
- niter : iterator
- An iterator over nodes in nbunch that are also in the graph.
- If nbunch is None, iterate over all nodes in the graph.
-
- Raises
- ------
- EasyGraphError
- If nbunch is not a node or sequence of nodes.
- If a node in nbunch is not hashable.
-
- See Also
- --------
- Graph.__iter__
-
- Notes
- -----
- When nbunch is an iterator, the returned iterator yields values
- directly from nbunch, becoming exhausted when nbunch is exhausted.
-
- To test whether nbunch is a single node, one can use
- "if nbunch in self:", even after processing with this routine.
-
- If nbunch is not a node or a (possibly empty) sequence/iterator
- or None, a :exc:`EasyGraphError` is raised. Also, if any object in
- nbunch is not hashable, a :exc:`EasyGraphError` is raised.
- """
- ifnbunchisNone:# include all nodes via iterator
- bunch=iter(self._adj)
- elifnbunchinself:# if nbunch is a single node
- bunch=iter([nbunch])
- else:# if nbunch is a sequence of nodes
-
- defbunch_iter(nlist,adj):
- try:
- forninnlist:
- ifninadj:
- yieldn
- exceptTypeErroraserr:
- exc,message=err,err.args[0]
- # capture error for non-sequence/iterator nbunch.
- if"iter"inmessage:
- exc=EasyGraphError(
- "nbunch is not a node or a sequence of nodes."
- )
- # capture error for unhashable node.
- if"hashable"inmessage:
- exc=EasyGraphError(
- f"Node {n} in sequence nbunch is not a valid node."
- )
- raiseexc
-
- bunch=bunch_iter(nbunch,self._adj)
- returnbunch
-
-
[docs]defneighbors(self,node):
-"""Returns an iterator of a node's neighbors (successors).
-
- Parameters
- ----------
- node : Hashable
- The target node.
-
- Returns
- -------
- neighbors : iterator
- An iterator of a node's neighbors (successors).
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_edges([(1,2), (2,3), (2,4)])
- >>> for neighbor in G.neighbors(node=2):
- ... print(neighbor)
-
- """
- # successors
- try:
- returniter(self._adj[node])
- exceptKeyError:
- print("No node {}".format(node))
-
- successors=neighbors
-
-
[docs]defpredecessors(self,node):
-"""Returns an iterator of a node's neighbors (predecessors).
-
- Parameters
- ----------
- node : Hashable
- The target node.
-
- Returns
- -------
- neighbors : iterator
- An iterator of a node's neighbors (predecessors).
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_edges([(1,2), (2,3), (2,4)])
- >>> for predecessor in G.predecessors(node=2):
- ... print(predecessor)
-
- """
- # predecessors
- try:
- returniter(self._pred[node])
- exceptKeyError:
- print("No node {}".format(node))
-
-
[docs]defall_neighbors(self,node):
-"""Returns an iterator of a node's neighbors, including both successors and predecessors.
-
- Parameters
- ----------
- node : Hashable
- The target node.
-
- Returns
- -------
- neighbors : iterator
- An iterator of a node's neighbors, including both successors and predecessors.
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_edges([(1,2), (2,3), (2,4)])
- >>> for neighbor in G.all_neighbors(node=2):
- ... print(neighbor)
-
- """
- # union of successors and predecessors
- try:
- neighbors=list(self._adj[node])
- neighbors.extend(self._pred[node])
- returniter(neighbors)
- exceptKeyError:
- print("No node {}".format(node))
-
-
[docs]defadd_node(self,node_for_adding,**node_attr):
-"""Add one node
-
- Add one node, type of which is any hashable Python object, such as int, string, dict, or even Graph itself.
- You can add with node attributes using Python dict type.
-
- Parameters
- ----------
- node_for_adding : any hashable Python object
- Nodes for adding.
-
- node_attr : keywords arguments, optional
- The node attributes.
- You can customize them with different key-value pairs.
-
- See Also
- --------
- add_nodes
-
- Examples
- --------
- >>> G.add_node('a')
- >>> G.add_node('hello world')
- >>> G.add_node('Jack', age=10)
-
- >>> G.add_node('Jack', **{
- ... 'age': 10,
- ... 'gender': 'M'
- ... })
-
- """
- self._add_one_node(node_for_adding,node_attr)
-
-
[docs]defadd_nodes(self,nodes_for_adding:list,nodes_attr:List[Dict]=[]):
-"""Add nodes with a list of nodes.
-
- Parameters
- ----------
- nodes_for_adding : list
-
- nodes_attr : list of dict
- The corresponding attribute for each of *nodes_for_adding*.
-
- See Also
- --------
- add_node
-
- Examples
- --------
- Add nodes with a list of nodes.
- You can add with node attributes using a list of Python dict type,
- each of which is the attribute of each node, respectively.
-
- >>> G.add_nodes([1, 2, 'a', 'b'])
- >>> G.add_nodes(range(1, 200))
-
- >>> G.add_nodes(['Jack', 'Tom', 'Lily'], nodes_attr=[
- ... {
- ... 'age': 10,
- ... 'gender': 'M'
- ... },
- ... {
- ... 'age': 11,
- ... 'gender': 'M'
- ... },
- ... {
- ... 'age': 10,
- ... 'gender': 'F'
- ... }
- ... ])
-
- """
- ifnodes_attrisNone:
- nodes_attr=[]
- ifnotlen(nodes_attr)==0:# Nodes attributes included in input
- assertlen(nodes_for_adding)==len(
- nodes_attr
- ),"Nodes and Attributes lists must have same length."
- else:# Set empty attribute for each node
- nodes_attr=[dict()foriinrange(len(nodes_for_adding))]
-
- foriinrange(len(nodes_for_adding)):
- try:
- self._add_one_node(nodes_for_adding[i],nodes_attr[i])
- exceptExceptionaserr:
- print(err)
- pass
-
-
[docs]defadd_nodes_from(self,nodes_for_adding,**attr):
-"""Add multiple nodes.
-
- Parameters
- ----------
- nodes_for_adding : iterable container
- A container of nodes (list, dict, set, etc.).
- OR
- A container of (node, attribute dict) tuples.
- Node attributes are updated using the attribute dict.
- attr : keyword arguments, optional (default= no attributes)
- Update attributes for all nodes in nodes.
- Node attributes specified in nodes as a tuple take
- precedence over attributes specified via keyword arguments.
-
- See Also
- --------
- add_node
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_nodes_from("Hello")
- >>> K3 = eg.Graph([(0, 1), (1, 2), (2, 0)])
- >>> G.add_nodes_from(K3)
- >>> sorted(G.nodes(), key=str)
- [0, 1, 2, 'H', 'e', 'l', 'o']
-
- Use keywords to update specific node attributes for every node.
-
- >>> G.add_nodes_from([1, 2], size=10)
- >>> G.add_nodes_from([3, 4], weight=0.4)
-
- Use (node, attrdict) tuples to update attributes for specific nodes.
-
- >>> G.add_nodes_from([(1, dict(size=11)), (2, {"color": "blue"})])
- >>> G.nodes[1]["size"]
- 11
- >>> H = eg.Graph()
- >>> H.add_nodes_from(G.nodes(data=True))
- >>> H.nodes[1]["size"]
- 11
-
- """
- forninnodes_for_adding:
- try:
- newnode=nnotinself._node
- newdict=attr
- exceptTypeError:
- n,ndict=n
- newnode=nnotinself._node
- newdict=attr.copy()
- newdict.update(ndict)
- ifnewnode:
- ifnisNone:
- raiseValueError("None cannot be a node")
- self._adj[n]=self.adjlist_inner_dict_factory()
- self._pred[n]=self.adjlist_inner_dict_factory()
- self._node[n]=self.node_attr_dict_factory()
- self._node[n].update(newdict)
-
- def_add_one_node(self,one_node_for_adding,node_attr:dict={}):
- node=one_node_for_adding
- ifnodenotinself._node:
- self._adj[node]=self.adjlist_inner_dict_factory()
- self._pred[node]=self.adjlist_inner_dict_factory()
-
- attr_dict=self._node[node]=self.node_attr_dict_factory()
- attr_dict.update(node_attr)
- else:# If already exists, there is no complain and still updating the node attribute
- self._node[node].update(node_attr)
-
-
[docs]defadd_edge(self,u_of_edge,v_of_edge,**edge_attr):
-"""Add a directed edge.
-
- Parameters
- ----------
- u_of_edge : object
- The start end of this edge
-
- v_of_edge : object
- The destination end of this edge
-
- edge_attr : keywords arguments, optional
- The attribute of the edge.
-
- Notes
- -----
- Nodes of this edge will be automatically added to the graph, if they do not exist.
-
- See Also
- --------
- add_edges
-
- Examples
- --------
-
- >>> G.add_edge(1,2)
- >>> G.add_edge('Jack', 'Tom', weight=10)
-
- Add edge with attributes, edge weight, for example,
-
- >>> G.add_edge(1, 2, **{
- ... 'weight': 20
- ... })
-
- """
- self._add_one_edge(u_of_edge,v_of_edge,edge_attr)
[docs]defadd_edges(self,edges_for_adding,edges_attr:List[Dict]=[]):
-"""Add a list of edges.
-
- Parameters
- ----------
- edges_for_adding : list of 2-element tuple
- The edges for adding. Each element is a (u, v) tuple, and u, v are
- start end and destination end, respectively.
-
- edges_attr : list of dict, optional
- The corresponding attributes for each edge in *edges_for_adding*.
-
- Examples
- --------
- Add a list of edges into *G*
-
- >>> G.add_edges([
- ... (1, 2),
- ... (3, 4),
- ... ('Jack', 'Tom')
- ... ])
-
- Add edge with attributes, for example, edge weight,
-
- >>> G.add_edges([(1,2), (2, 3)], edges_attr=[
- ... {
- ... 'weight': 20
- ... },
- ... {
- ... 'weight': 15
- ... }
- ... ])
-
- """
- ifedges_attrisNone:
- edges_attr=[]
- ifnotlen(edges_attr)==0:# Edges attributes included in input
- assertlen(edges_for_adding)==len(
- edges_attr
- ),"Edges and Attributes lists must have same length."
- else:# Set empty attribute for each edge
- edges_attr=[dict()foriinrange(len(edges_for_adding))]
-
- foriinrange(len(edges_for_adding)):
- try:
- edge=edges_for_adding[i]
- attr=edges_attr[i]
- assertlen(edge)==2,"Edge tuple {} must be 2-tuple.".format(edge)
- self._add_one_edge(edge[0],edge[1],attr)
- exceptExceptionaserr:
- print(err)
-
-
[docs]defadd_edges_from(self,ebunch_to_add,**attr):
-"""Add all the edges in ebunch_to_add.
-
- Parameters
- ----------
- ebunch_to_add : container of edges
- Each edge given in the container will be added to the
- graph. The edges must be given as 2-tuples (u, v) or
- 3-tuples (u, v, d) where d is a dictionary containing edge data.
- attr : keyword arguments, optional
- Edge data (or labels or objects) can be assigned using
- keyword arguments.
-
- See Also
- --------
- add_edge : add a single edge
- add_weighted_edges_from : convenient way to add weighted edges
-
- Notes
- -----
- Adding the same edge twice has no effect but any edge data
- will be updated when each duplicate edge is added.
-
- Edge attributes specified in an ebunch take precedence over
- attributes specified via keyword arguments.
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_edges_from([(0, 1), (1, 2)]) # using a list of edge tuples
- >>> e = zip(range(0, 3), range(1, 4))
- >>> G.add_edges_from(e) # Add the path graph 0-1-2-3
-
- Associate data to edges
-
- >>> G.add_edges_from([(1, 2), (2, 3)], weight=3)
- >>> G.add_edges_from([(3, 4), (1, 4)], label="WN2898")
- """
- foreinebunch_to_add:
- ne=len(e)
- ifne==3:
- u,v,dd=e
- elifne==2:
- u,v=e
- dd={}
- else:
- raiseEasyGraphError(f"Edge tuple {e} must be a 2-tuple or 3-tuple.")
- ifunotinself._adj:
- ifuisNone:
- raiseValueError("None cannot be a node")
- self._adj[u]=self.adjlist_inner_dict_factory()
- self._pred[u]=self.adjlist_inner_dict_factory()
- self._node[u]=self.node_attr_dict_factory()
- ifvnotinself._adj:
- ifvisNone:
- raiseValueError("None cannot be a node")
- self._adj[v]=self.adjlist_inner_dict_factory()
- self._pred[v]=self.adjlist_inner_dict_factory()
- self._node[v]=self.node_attr_dict_factory()
- datadict=self._adj[u].get(v,self.edge_attr_dict_factory())
- datadict.update(attr)
- datadict.update(dd)
- self._adj[u][v]=datadict
- self._pred[v][u]=datadict
-
-
[docs]defadd_edges_from_file(self,file,weighted=False):
-"""Added edges from file
- For example, txt files,
-
- Each line is in form like:
- a b 23.0
- which denotes an edge `a → b` with weight 23.0.
-
- Parameters
- ----------
- file : string
- The file path.
-
- weighted : boolean, optional (default : False)
- If the file consists of weight information, set `True`.
- The weight key will be set as 'weight'.
-
- Examples
- --------
-
- If `./club_network.txt` is:
-
- Jack Mary 23.0
-
- Mary Tom 15.0
-
- Tom Ben 20.0
-
- Then add them to *G*
-
- >>> G.add_edges_from_file(file='./club_network.txt', weighted=True)
-
-
- """
- importre
-
- withopen(file,"r")asfp:
- edges=fp.readlines()
- ifweighted:
- foredgeinedges:
- edge=re.sub(","," ",edge)
- edge=edge.split()
- try:
- self.add_edge(edge[0],edge[1],weight=float(edge[2]))
- except:
- pass
- else:
- foredgeinedges:
- edge=re.sub(","," ",edge)
- edge=edge.split()
- try:
- self.add_edge(edge[0],edge[1])
- except:
- pass
[docs]defremove_node(self,node_to_remove):
-"""Remove one node from your graph.
-
- Parameters
- ----------
- node_to_remove : object
- The node you want to remove.
-
- See Also
- --------
- remove_nodes
-
- Examples
- --------
- Remove node *Jack* from *G*
-
- >>> G.remove_node('Jack')
-
- """
- try:
- succs=list(self._adj[node_to_remove])
- preds=list(self._pred[node_to_remove])
- delself._node[node_to_remove]
- exceptKeyError:# Node not exists in self
- raiseKeyError("No node {} in graph.".format(node_to_remove))
- forsuccinsuccs:# Remove edges start with node_to_remove
- delself._pred[succ][node_to_remove]
- forpredinpreds:# Remove edges end with node_to_remove
- delself._adj[pred][node_to_remove]
-
- # Remove this node
- delself._adj[node_to_remove]
- delself._pred[node_to_remove]
-
-
[docs]defremove_nodes(self,nodes_to_remove:list):
-"""Remove nodes from your graph.
-
- Parameters
- ----------
- nodes_to_remove : list of object
- The list of nodes you want to remove.
-
- See Also
- --------
- remove_node
-
- Examples
- --------
- Remove node *[1, 2, 'a', 'b']* from *G*
-
- >>> G.remove_nodes([1, 2, 'a', 'b'])
-
- """
- for(
- node
- )in(
- nodes_to_remove
- ):# If not all nodes included in graph, give up removing other nodes
- assertnodeinself._node,"Remove Error: No node {} in graph".format(node)
- fornodeinnodes_to_remove:
- self.remove_node(node)
-
-
[docs]defremove_edge(self,u,v):
-"""Remove one edge from your graph.
-
- Parameters
- ----------
- u : object
- The start end of the edge.
-
- v : object
- The destination end of the edge.
-
- See Also
- --------
- remove_edges
-
- Examples
- --------
- Remove edge (1,2) from *G*
-
- >>> G.remove_edge(1,2)
-
- """
- try:
- delself._adj[u][v]
- delself._pred[v][u]
- exceptKeyError:
- raiseKeyError("No edge {}-{} in graph.".format(u,v))
-
-
[docs]defremove_edges(self,edges_to_remove:[tuple]):
-"""Remove a list of edges from your graph.
-
- Parameters
- ----------
- edges_to_remove : list of tuple
- The list of edges you want to remove,
- Each element is (u, v) tuple, which denote the start and destination
- end of the edge, respectively.
-
- See Also
- --------
- remove_edge
-
- Examples
- --------
- Remove the edges *('Jack', 'Mary')* amd *('Mary', 'Tom')* from *G*
-
- >>> G.remove_edge([
- ... ('Jack', 'Mary'),
- ... ('Mary', 'Tom')
- ... ])
-
- """
- foredgeinedges_to_remove:
- u,v=edge[:2]
- self.remove_edge(u,v)
-
-
[docs]defremove_edges_from(self,ebunch):
-"""Remove all edges specified in ebunch.
-
- Parameters
- ----------
- ebunch: list or container of edge tuples
- Each edge given in the list or container will be removed
- from the graph. The edges can be:
-
- - 2-tuples (u, v) edge between u and v.
- - 3-tuples (u, v, k) where k is ignored.
-
- See Also
- --------
- remove_edge : remove a single edge
-
- Notes
- -----
- Will fail silently if an edge in ebunch is not in the graph.
-
- Examples
- --------
- >>> G = eg.path_graph(4) # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> ebunch = [(1, 2), (2, 3)]
- >>> G.remove_edges_from(ebunch)
- """
- foreinebunch:
- u,v=e[:2]# ignore edge data
- ifuinself._adjandvinself._adj[u]:
- delself._adj[u][v]
- delself._pred[v][u]
[docs]defnumber_of_nodes(self):
-"""Returns the number of nodes.
-
- Returns
- -------
- number_of_nodes : int
- The number of nodes.
- """
- returnlen(self._node)
[docs]defis_multigraph(self):
-"""Returns True if graph is a multigraph, False otherwise."""
- returnFalse
-
-
[docs]defcopy(self):
-"""Return a deep copy of the graph.
-
- Returns
- -------
- copy : easygraph.DiGraph
- A deep copy of the original graph.
-
- Examples
- --------
- *G2* is a deep copy of *G1*
-
- >>> G2 = G1.copy()
-
- """
- G=self.__class__()
- G.graph.update(self.graph)
- fornode,node_attrinself._node.items():
- G.add_node(node,**node_attr)
- foru,nbrsinself._adj.items():
- forv,edge_datainnbrs.items():
- G.add_edge(u,v,**edge_data)
-
- returnG
-
-
[docs]defnodes_subgraph(self,from_nodes:list):
-"""Returns a subgraph of some nodes
-
- Parameters
- ----------
- from_nodes : list of object
- The nodes in subgraph.
-
- Returns
- -------
- nodes_subgraph : easygraph.Graph
- The subgraph consisting of *from_nodes*.
-
- Examples
- --------
-
- >>> G = eg.Graph()
- >>> G.add_edges([(1,2), (2,3), (2,4), (4,5)])
- >>> G_sub = G.nodes_subgraph(from_nodes= [1,2,3])
-
- """
- # Edge
- from_nodes=set(from_nodes)
- G=self.__class__()
- G.graph.update(self.graph)
- from_nodes=set(from_nodes)
- fornodeinfrom_nodes:
- try:
- G.add_node(node,**self._node[node])
- exceptKeyError:
- pass
-
- forv,edge_datainself._adj[node].items():
- ifvinfrom_nodes:
- G.add_edge(node,v,**edge_data)
- returnG
-
-
[docs]defego_subgraph(self,center):
-"""Returns an ego network graph of a node.
-
- Parameters
- ----------
- center : object
- The center node of the ego network graph
-
- Returns
- -------
- ego_subgraph : easygraph.Graph
- The ego network graph of *center*.
-
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_edges([
- ... ('Jack', 'Maria'),
- ... ('Maria', 'Andy'),
- ... ('Jack', 'Tom')
- ... ])
- >>> G.ego_subgraph(center='Jack')
- """
- neighbors_of_center=list(self.all_neighbors(center))
- neighbors_of_center.append(center)
- returnself.nodes_subgraph(from_nodes=neighbors_of_center)
-
-
[docs]defto_index_node_graph(self,begin_index=0):
-"""Returns a deep copy of graph, with each node switched to its index.
-
- Considering that the nodes of your graph may be any possible hashable Python object,
- you can get an isomorphic graph of the original one, with each node switched to its index.
-
- Parameters
- ----------
- begin_index : int
- The begin index of the index graph.
-
- Returns
- -------
- G : easygraph.Graph
- Deep copy of graph, with each node switched to its index.
-
- index_of_node : dict
- Index of node
-
- node_of_index : dict
- Node of index
-
- Examples
- --------
- The following method returns this isomorphic graph and index-to-node dictionary
- as well as node-to-index dictionary.
-
- >>> G = eg.Graph()
- >>> G.add_edges([
- ... ('Jack', 'Maria'),
- ... ('Maria', 'Andy'),
- ... ('Jack', 'Tom')
- ... ])
- >>> G_index_graph, index_of_node, node_of_index = G.to_index_node_graph()
-
- """
- G=self.__class__()
- G.graph.update(self.graph)
- index_of_node=dict()
- node_of_index=dict()
- forindex,(node,node_attr)inenumerate(self._node.items()):
- G.add_node(index+begin_index,**node_attr)
- index_of_node[node]=index+begin_index
- node_of_index[index+begin_index]=node
- foru,nbrsinself._adj.items():
- forv,edge_datainnbrs.items():
- G.add_edge(index_of_node[u],index_of_node[v],**edge_data)
-
- returnG,index_of_node,node_of_index
[docs]classMultiDiGraph(MultiGraph,DiGraph):
- edge_key_dict_factory=dict
-
- def__init__(self,incoming_graph_data=None,multigraph_input=None,**attr):
-"""Initialize a graph with edges, name, or graph attributes.
-
- Parameters
- ----------
- incoming_graph_data : input graph
- Data to initialize graph. If incoming_graph_data=None (default)
- an empty graph is created. The data can be an edge list, or any
- EasyGraph graph object. If the corresponding optional Python
- packages are installed the data can also be a NumPy matrix
- or 2d ndarray, a SciPy sparse matrix, or a PyGraphviz graph.
-
- multigraph_input : bool or None (default None)
- Note: Only used when `incoming_graph_data` is a dict.
- If True, `incoming_graph_data` is assumed to be a
- dict-of-dict-of-dict-of-dict structure keyed by
- node to neighbor to edge keys to edge data for multi-edges.
- A EasyGraphError is raised if this is not the case.
- If False, :func:`to_easygraph_graph` is used to try to determine
- the dict's graph data structure as either a dict-of-dict-of-dict
- keyed by node to neighbor to edge data, or a dict-of-iterable
- keyed by node to neighbors.
- If None, the treatment for True is tried, but if it fails,
- the treatment for False is tried.
-
- attr : keyword arguments, optional (default= no attributes)
- Attributes to add to graph as key=value pairs.
-
- See Also
- --------
- convert
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G = eg.Graph(name="my graph")
- >>> e = [(1, 2), (2, 3), (3, 4)] # list of edges
- >>> G = eg.Graph(e)
-
- Arbitrary graph attribute pairs (key=value) may be assigned
-
- >>> G = eg.Graph(e, day="Friday")
- >>> G.graph
- {'day': 'Friday'}
-
- """
- self.edge_key_dict_factory=self.edge_key_dict_factory
- # multigraph_input can be None/True/False. So check "is not False"
- ifisinstance(incoming_graph_data,dict)andmultigraph_inputisnotFalse:
- DiGraph.__init__(self)
- try:
- convert.from_dict_of_dicts(
- incoming_graph_data,create_using=self,multigraph_input=True
- )
- self.graph.update(attr)
- exceptExceptionaserr:
- ifmultigraph_inputisTrue:
- raiseEasyGraphError(
- f"converting multigraph_input raised:\n{type(err)}: {err}"
- )
- DiGraph.__init__(self,incoming_graph_data,**attr)
- else:
- DiGraph.__init__(self,incoming_graph_data,**attr)
-
-
[docs]defadd_edge(self,u_for_edge,v_for_edge,key=None,**attr):
-"""Add an edge between u and v.
-
- The nodes u and v will be automatically added if they are
- not already in the graph.
-
- Edge attributes can be specified with keywords or by directly
- accessing the edge's attribute dictionary. See examples below.
-
- Parameters
- ----------
- u_for_edge, v_for_edge : nodes
- Nodes can be, for example, strings or numbers.
- Nodes must be hashable (and not None) Python objects.
- key : hashable identifier, optional (default=lowest unused integer)
- Used to distinguish multiedges between a pair of nodes.
- attr : keyword arguments, optional
- Edge data (or labels or objects) can be assigned using
- keyword arguments.
-
- Returns
- -------
- The edge key assigned to the edge.
-
- See Also
- --------
- add_edges_from : add a collection of edges
-
- Notes
- -----
- To replace/update edge data, use the optional key argument
- to identify a unique edge. Otherwise a new edge will be created.
-
- EasyGraph algorithms designed for weighted graphs cannot use
- multigraphs directly because it is not clear how to handle
- multiedge weights. Convert to Graph using edge attribute
- 'weight' to enable weighted graph algorithms.
-
- Default keys are generated using the method `new_edge_key()`.
- This method can be overridden by subclassing the base class and
- providing a custom `new_edge_key()` method.
-
- Examples
- --------
- The following all add the edge e=(1, 2) to graph G:
-
- >>> G = eg.MultiDiGraph()
- >>> e = (1, 2)
- >>> key = G.add_edge(1, 2) # explicit two-node form
- >>> G.add_edge(*e) # single edge as tuple of two nodes
- 1
- >>> G.add_edges_from([(1, 2)]) # add edges from iterable container
- [2]
-
- Associate data to edges using keywords:
-
- >>> key = G.add_edge(1, 2, weight=3)
- >>> key = G.add_edge(1, 2, key=0, weight=4) # update data for key=0
- >>> key = G.add_edge(1, 3, weight=7, capacity=15, length=342.7)
-
- For non-string attribute keys, use subscript notation.
-
- >>> ekey = G.add_edge(1, 2)
- >>> G[1][2][0].update({0: 5})
- >>> G.edges[1, 2, 0].update({0: 5})
- """
- u,v=u_for_edge,v_for_edge
- # add nodes
- ifunotinself._adj:
- ifuisNone:
- raiseValueError("None cannot be a node")
- self._adj[u]=self.adjlist_inner_dict_factory()
- self._pred[u]=self.adjlist_inner_dict_factory()
- self._node[u]=self.node_attr_dict_factory()
- ifvnotinself._adj:
- ifvisNone:
- raiseValueError("None cannot be a node")
- self._adj[v]=self.adjlist_inner_dict_factory()
- self._pred[v]=self.adjlist_inner_dict_factory()
- self._node[v]=self.node_attr_dict_factory()
- ifkeyisNone:
- key=self.new_edge_key(u,v)
- ifvinself._adj[u]:
- keydict=self._adj[u][v]
- datadict=keydict.get(key,self.edge_key_dict_factory())
- datadict.update(attr)
- keydict[key]=datadict
- else:
- # selfloops work this way without special treatment
- datadict=self.edge_attr_dict_factory()
- datadict.update(attr)
- keydict=self.edge_key_dict_factory()
- keydict[key]=datadict
- self._adj[u][v]=keydict
- self._pred[v][u]=keydict
- returnkey
-
-
[docs]defremove_edge(self,u,v,key=None):
-"""Remove an edge between u and v.
-
- Parameters
- ----------
- u, v : nodes
- Remove an edge between nodes u and v.
- key : hashable identifier, optional (default=None)
- Used to distinguish multiple edges between a pair of nodes.
- If None remove a single (arbitrary) edge between u and v.
-
- Raises
- ------
- EasyGraphError
- If there is not an edge between u and v, or
- if there is no edge with the specified key.
-
- See Also
- --------
- remove_edges_from : remove a collection of edges
-
- Examples
- --------
- >>> G = eg.MultiDiGraph()
- >>> G.add_edges_from([(1, 2), (1, 2), (1, 2)]) # key_list returned
- [0, 1, 2]
- >>> G.remove_edge(1, 2) # remove a single (arbitrary) edge
-
- For edges with keys
-
- >>> G = eg.MultiDiGraph()
- >>> G.add_edge(1, 2, key="first")
- 'first'
- >>> G.add_edge(1, 2, key="second")
- 'second'
- >>> G.remove_edge(1, 2, key="second")
-
- """
- try:
- d=self._adj[u][v]
- exceptKeyErroraserr:
- raiseEasyGraphError(f"The edge {u}-{v} is not in the graph.")fromerr
- # remove the edge with specified data
- ifkeyisNone:
- d.popitem()
- else:
- try:
- deld[key]
- exceptKeyErroraserr:
- msg=f"The edge {u}-{v} with key {key} is not in the graph."
- raiseEasyGraphError(msg)fromerr
- iflen(d)==0:
- # remove the key entries if last edge
- delself._adj[u][v]
- delself._pred[v][u]
[docs]defis_multigraph(self):
-"""Returns True if graph is a multigraph, False otherwise."""
- returnTrue
-
-
[docs]defis_directed(self):
-"""Returns True if graph is directed, False otherwise."""
- returnTrue
-
-
[docs]defto_undirected(self,reciprocal=False):
-"""Returns an undirected representation of the multidigraph.
-
- Parameters
- ----------
- reciprocal : bool (optional)
- If True only keep edges that appear in both directions
- in the original digraph.
-
- Returns
- -------
- G : MultiGraph
- An undirected graph with the same name and nodes and
- with edge (u, v, data) if either (u, v, data) or (v, u, data)
- is in the digraph. If both edges exist in digraph and
- their edge data is different, only one edge is created
- with an arbitrary choice of which edge data to use.
- You must check and correct for this manually if desired.
-
- See Also
- --------
- MultiGraph, add_edge, add_edges_from
-
- Notes
- -----
- This returns a "deepcopy" of the edge, node, and
- graph attributes which attempts to completely copy
- all of the data and references.
-
- This is in contrast to the similar D=MultiDiGraph(G) which
- returns a shallow copy of the data.
-
- See the Python copy module for more information on shallow
- and deep copies, https://docs.python.org/3/library/copy.html.
-
- Warning: If you have subclassed MultiDiGraph to use dict-like
- objects in the data structure, those changes do not transfer
- to the MultiGraph created by this method.
-
- Examples
- --------
- >>> G = eg.path_graph(2) # or MultiGraph, etc
- >>> H = G.to_directed()
- >>> list(H.edges)
- [(0, 1), (1, 0)]
- >>> G2 = H.to_undirected()
- >>> list(G2.edges)
- [(0, 1)]
- """
- G=eg.MultiGraph()
- G.graph.update(deepcopy(self.graph))
- G.add_nodes_from((n,deepcopy(d))forn,dinself._node.items())
- ifreciprocalisTrue:
- G.add_edges_from(
- (u,v,key,deepcopy(data))
- foru,nbrsinself._adj.items()
- forv,keydictinnbrs.items()
- forkey,datainkeydict.items()
- ifvinself._pred[u]andkeyinself._pred[u][v]
- )
- else:
- G.add_edges_from(
- (u,v,key,deepcopy(data))
- foru,nbrsinself._adj.items()
- forv,keydictinnbrs.items()
- forkey,datainkeydict.items()
- )
- returnG
-
-
[docs]defreverse(self,copy=True):
-"""Returns the reverse of the graph.
-
- The reverse is a graph with the same nodes and edges
- but with the directions of the edges reversed.
-
- Parameters
- ----------
- copy : bool optional (default=True)
- If True, return a new DiGraph holding the reversed edges.
- If False, the reverse graph is created using a view of
- the original graph.
- """
- ifcopy:
- H=self.__class__()
- H.graph.update(deepcopy(self.graph))
- H.add_nodes_from((n,deepcopy(d))forn,dinself._node.items())
- H.add_edges_from(
- (v,u,k,deepcopy(d))
- foru,v,k,dinself.edges(keys=True,data=True)
- )
- returnH
- returneg.graphviews.reverse_view(self)
[docs]classGraph:
-"""
- Base class for undirected graphs.
-
- Nodes are allowed for any hashable Python objects, including int, string, dict, etc.
- Edges are stored as Python dict type, with optional key/value attributes.
-
- Parameters
- ----------
- graph_attr : keywords arguments, optional (default : None)
- Attributes to add to graph as key=value pairs.
-
- See Also
- --------
- DiGraph
-
- Examples
- --------
- Create an empty undirected graph with no nodes and edges.
-
- >>> G = eg.Graph()
-
- Create a deep copy graph *G2* from existing Graph *G1*.
-
- >>> G2 = G1.copy()
-
- Create an graph with attributes.
-
- >>> G = eg.Graph(name='Karate Club', date='2020.08.21')
-
- **Attributes:**
-
- Returns the adjacency matrix of the graph.
-
- >>> G.adj
-
- Returns all the nodes with their attributes.
-
- >>> G.nodes
-
- Returns all the edges with their attributes.
-
- >>> G.edges
-
- """
-
- gnn_data_dict_factory=dict
- raw_selfloop_dict=dict
- graph_attr_dict_factory=dict
- node_dict_factory=dict
- node_attr_dict_factory=dict
- adjlist_outer_dict_factory=dict
- adjlist_inner_dict_factory=dict
- edge_attr_dict_factory=dict
- node_index_dict=dict
-
- def__init__(self,incoming_graph_data=None,extra_selfloop=False,**graph_attr):
- self.graph=self.graph_attr_dict_factory()
- self._node=self.node_dict_factory()
- self._adj=self.adjlist_outer_dict_factory()
- self._ndata=self.gnn_data_dict_factory()
- self._raw_selfloop_dict=self.raw_selfloop_dict()
- self.extra_selfloop=extra_selfloop
- self.cache={}
- self._node_index=self.node_index_dict()
- self.cflag=0
- self._id=0
- self.device="cpu"
- ifincoming_graph_dataisnotNone:
- convert.to_easygraph_graph(incoming_graph_data,create_using=self)
- self.graph.update(graph_attr)
-
- def__iter__(self):
- returniter(self._node)
-
- def__len__(self):
- returnlen(self._node)
-
- def__contains__(self,node):
- try:
- returnnodeinself._node
- exceptTypeError:
- returnFalse
-
- def__getitem__(self,node):
- # return list(self._adj[node].keys())
- returnself._adj[node]
-
- @property
- defndata(self):
- returnself._ndata
-
- @property
- defadj(self):
- returnself._adj
-
- @property
- defnodes(self):
- returnself._node
- # return [node for node in self._node]
-
- @property
- defnode_index(self):
- returnself._node_index
-
- @property
- defnode_index(self):
- returnself._node_index
-
- @property
- defedges(self):
- ifself.cache.get("edges")!=None:
- returnself.cache["edges"]
- edge_lst=list()
- seen=set()
- foruinself._adj:
- forvinself._adj[u]:
- if(u,v)notinseen:
- seen.add((u,v))
- seen.add((v,u))
- edge_lst.append((u,v,self._adj[u][v]))
- delseen
- self.cache["edge"]=edge_lst
- returnself.cache["edge"]
-
- @property
- defname(self):
-"""String identifier of the graph.
-
- This graph attribute appears in the attribute dict G.graph
- keyed by the string `"name"`. as well as an attribute (technically
- a property) `G.name`. This is entirely user controlled.
- """
- returnself.graph.get("name","")
-
- @property
- defe_both_side(self,weight="weight")->Tuple[List[List],List[float]]:
- ifself.cache.get("e_both_side")!=None:
- returnself.cache["e_both_side"]
- edges=list()
- weights=list()
- seen=set()
- foruinself._adj:
- forvinself._adj[u]:
- if(u,v)notinseen:
- seen.add((u,v))
- seen.add((v,u))
- edges.append([u,v])
- edges.append([v,u])
- ifweightnotinself._adj[u][v]:
- warnings.warn("There is no property %s,default to 1"%(weight))
- weights.append(1.0)
- weights.append(1.0)
- else:
- iftype(self._adj[u][v][weight])==float:
- weights.append(self._adj[u][v][weight])
- weights.append(self._adj[u][v][weight])
- else:
- raiseEasyGraphException("The type of weight must be float")
- delseen
- self.cache["e_both_side"]=(edges,weights)
- returnself.cache["e_both_side"]
-
-
[docs]@staticmethod
- deffrom_hypergraph_hypergcn(
- hypergraph,
- feature,
- with_mediator=False,
- remove_selfloop=True,
- ):
- importtorch
-
-r"""Construct a graph from a hypergraph with methods proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://arxiv.org/pdf/1809.02589.pdf>`_ paper .
-
- Args:
- ``hypergraph`` (``Hypergraph``): The source hypergraph.
- ``feature`` (``torch.Tensor``): The feature of the vertices.
- ``with_mediator`` (``str``): Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``.
- ``remove_selfloop`` (``bool``): Whether to remove self-loop. Defaults to ``True``.
- ``device`` (``torch.device``): The device to store the graph. Defaults to ``torch.device("cpu")``.
- """
- num_v=hypergraph.num_v
- assert(
- num_v==feature.shape[0]
- ),"The number of vertices in hypergraph and feature.shape[0] must be equal!"
- e_list,new_e_list,new_e_weight=hypergraph.e[0],[],[]
- rv=torch.rand((feature.shape[1],1),device=feature.device)
- foreine_list:
- num_v_in_e=len(e)
- assert(
- num_v_in_e>=2
- ),"The number of vertices in an edge must be greater than or equal to 2!"
- p=torch.mm(feature[e,:],rv).squeeze()
- v_a_idx,v_b_idx=torch.argmax(p),torch.argmin(p)
- ifnotwith_mediator:
- new_e_list.append([e[v_a_idx],e[v_b_idx]])
- new_e_weight.append(1.0/num_v_in_e)
- else:
- w=1.0/(2*num_v_in_e-3)
- formid_v_idxinrange(num_v_in_e):
- ifmid_v_idx!=v_a_idxandmid_v_idx!=v_b_idx:
- new_e_list.append([e[v_a_idx],e[mid_v_idx]])
- new_e_weight.append(w)
- new_e_list.append([e[v_b_idx],e[mid_v_idx]])
- new_e_weight.append(w)
- # remove selfloop
- ifremove_selfloop:
- new_e_list=torch.tensor(new_e_list,dtype=torch.long)
- new_e_weight=torch.tensor(new_e_weight,dtype=torch.float)
- e_mask=(new_e_list[:,0]!=new_e_list[:,1]).bool()
- new_e_list=new_e_list[e_mask].numpy().tolist()
- new_e_weight=new_e_weight[e_mask].numpy().tolist()
-
- _g=Graph()
- _g.add_nodes(list(range(0,num_v)))
- for(
- e,
- w,
- )inzip(new_e_list,new_e_weight):
- if_g.has_edge(e[0],e[1]):
- _g.add_edge(e[0],e[1],weight=(w+_g.adj[e[0]][e[1]]["weight"]))
- else:
- _g.add_edge(e[0],e[1],weight=w)
- return_g
[docs]defadd_extra_selfloop(self):
-r"""Add extra selfloops to the graph."""
- self._has_extra_selfloop=True
- self._clear_cache()
-
-
[docs]defremove_extra_selfloop(self):
-r"""Remove extra selfloops from the graph."""
- self._has_extra_selfloop=False
- self._clear_cache()
-
-
[docs]defremove_selfloop(self):
-r"""Remove all selfloops from the graph."""
- self._raw_selfloop_dict.clear()
- self.remove_extra_selfloop()
- self._clear_cache()
-
-
[docs]defnbr_v(self,v_idx:int)->Tuple[List[int],List[float]]:
-r"""Return a vertex list of the neighbors of the vertex ``v_idx``.
-
- Args:
- ``v_idx`` (``int``): The index of the vertex.
- """
- returnself.N_v(v_idx).cpu().numpy().tolist()
-
-
[docs]defN_v(self,v_idx:int)->Tuple[List[int],List[float]]:
-r"""Return the neighbors of the vertex ``v_idx`` with ``torch.Tensor`` format.
-
- Args:
- ``v_idx`` (``int``): The index of the vertex.
- """
- sub_v_set=self.A[v_idx]._indices()[0].clone()
- returnsub_v_set
[docs]defdegree(self,weight="weight"):
-"""Returns the weighted degree of of each node.
-
- Parameters
- ----------
- weight : string, optional (default: 'weight')
- Weight key of the original weighted graph.
-
- Returns
- -------
- degree : dict
- Each node's (key) weighted degree (value).
-
- Notes
- -----
- If the graph is not weighted, all the weights will be regarded as 1.
-
- Examples
- --------
- You can call with no attributes, if 'weight' is the weight key:
-
- >>> G.degree()
-
- if you have customized weight key 'weight_1'.
-
- >>> G.degree(weight='weight_1')
-
- """
- ifself.cache.get("degree")!=None:
- returnself.cache["degree"]
- degree=dict()
- foru,v,dinself.edges:
- ifuindegree:
- degree[u]+=d.get(weight,1)
- else:
- degree[u]=d.get(weight,1)
- ifvindegree:
- degree[v]+=d.get(weight,1)
- else:
- degree[v]=d.get(weight,1)
-
- # For isolated nodes
- fornodeinself.nodes:
- ifnodenotindegree:
- degree[node]=0
- self.cache["degree"]=degree
- returndegree
-
-
[docs]deforder(self):
-"""Returns the number of nodes in the graph.
-
- Returns
- -------
- nnodes : int
- The number of nodes in the graph.
-
- See Also
- --------
- number_of_nodes: identical method
- __len__: identical method
-
- Examples
- --------
- >>> G = eg.path_graph(3) # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.order()
- 3
- """
- returnlen(self._node)
-
-
[docs]defsize(self,weight=None):
-"""Returns the number of edges or total of all edge weights.
-
- Parameters
- -----------
- weight : String or None, optional
- The weight key. If None, it will calculate the number of
- edges, instead of total of all edge weights.
-
- Returns
- -------
- size : int or float, optional (default: None)
- The number of edges or total of all edge weights.
-
- Examples
- --------
-
- Returns the number of edges in G:
-
- >>> G.size()
-
- Returns the total of all edge weights in G:
-
- >>> G.size(weight='weight')
-
- """
- ifself.cache.get("size")!=None:
- returnself.cache["size"]
- s=sum(dforv,dinself.degree(weight=weight).items())
- self.cache["size"]=s//2ifweightisNoneelses/2
- returnself.cache["size"]
[docs]defsmoothing_with_GCN(self,X,drop_rate=0.0):
-r"""Return the smoothed feature matrix with GCN Laplacian matrix :math:`\mathcal{L}_{GCN}`.
-
- Args:
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in adjacency matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- ifdrop_rate>0.0:
- L_GCN=sparse_dropout(self.L_GCN,drop_rate)
- else:
- L_GCN=self.L_GCN
- returnL_GCN.mm(X)
-
-
[docs]defnumber_of_edges(self,u=None,v=None):
-"""Returns the number of edges between two nodes.
-
- Parameters
- ----------
- u, v : nodes, optional (default=all edges)
- If u and v are specified, return the number of edges between
- u and v. Otherwise return the total number of all edges.
-
- Returns
- -------
- nedges : int
- The number of edges in the graph. If nodes `u` and `v` are
- specified return the number of edges between those nodes. If
- the graph is directed, this only returns the number of edges
- from `u` to `v`.
-
- See Also
- --------
- size
-
- Examples
- --------
- For undirected graphs, this method counts the total number of
- edges in the graph:
-
- >>> G = eg.path_graph(4)
- >>> G.number_of_edges()
- 3
-
- If you specify two nodes, this counts the total number of edges
- joining the two nodes:
-
- >>> G.number_of_edges(0, 1)
- 1
-
- For directed graphs, this method can count the total number of
- directed edges from `u` to `v`:
-
- >>> G = eg.DiGraph()
- >>> G.add_edge(0, 1)
- >>> G.add_edge(1, 0)
- >>> G.number_of_edges(0, 1)
- 1
-
- """
- ifuisNone:
- returnint(self.size())
- ifvinself._adj[u]:
- return1
- return0
-
-
[docs]defnbunch_iter(self,nbunch=None):
-"""Returns an iterator over nodes contained in nbunch that are
- also in the graph.
-
- The nodes in nbunch are checked for membership in the graph
- and if not are silently ignored.
-
- Parameters
- ----------
- nbunch : single node, container, or all nodes (default= all nodes)
- The view will only report edges incident to these nodes.
-
- Returns
- -------
- niter : iterator
- An iterator over nodes in nbunch that are also in the graph.
- If nbunch is None, iterate over all nodes in the graph.
-
- Raises
- ------
- EasyGraphError
- If nbunch is not a node or sequence of nodes.
- If a node in nbunch is not hashable.
-
- See Also
- --------
- Graph.__iter__
-
- Notes
- -----
- When nbunch is an iterator, the returned iterator yields values
- directly from nbunch, becoming exhausted when nbunch is exhausted.
-
- To test whether nbunch is a single node, one can use
- "if nbunch in self:", even after processing with this routine.
-
- If nbunch is not a node or a (possibly empty) sequence/iterator
- or None, a :exc:`EasyGraphError` is raised. Also, if any object in
- nbunch is not hashable, a :exc:`EasyGraphError` is raised.
- """
- ifnbunchisNone:# include all nodes via iterator
- bunch=iter(self._adj)
- elifnbunchinself:# if nbunch is a single node
- bunch=iter([nbunch])
- else:# if nbunch is a sequence of nodes
-
- defbunch_iter(nlist,adj):
- try:
- forninnlist:
- ifninadj:
- yieldn
- exceptTypeErroraserr:
- exc,message=err,err.args[0]
- # capture error for non-sequence/iterator nbunch.
- if"iter"inmessage:
- exc=EasyGraphError(
- "nbunch is not a node or a sequence of nodes."
- )
- # capture error for unhashable node.
- if"hashable"inmessage:
- exc=EasyGraphError(
- f"Node {n} in sequence nbunch is not a valid node."
- )
- raiseexc
-
- bunch=bunch_iter(nbunch,self._adj)
- returnbunch
-
-
[docs]defneighbors(self,node):
-"""Returns an iterator of a node's neighbors.
-
- Parameters
- ----------
- node : Hashable
- The target node.
-
- Returns
- -------
- neighbors : iterator
- An iterator of a node's neighbors.
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_edges([(1,2), (2,3), (2,4)])
- >>> for neighbor in G.neighbors(node=2):
- ... print(neighbor)
-
- """
- try:
- returniter(self._adj[node])
- exceptKeyError:
- print("No node {}".format(node))
-
- all_neighbors=neighbors
-
-
[docs]defadd_node(self,node_for_adding,**node_attr):
-"""Add one node
-
- Add one node, type of which is any hashable Python object, such as int, string, dict, or even Graph itself.
- You can add with node attributes using Python dict type.
-
- Parameters
- ----------
- node_for_adding : any hashable Python object
- Nodes for adding.
-
- node_attr : keywords arguments, optional
- The node attributes.
- You can customize them with different key-value pairs.
-
- See Also
- --------
- add_nodes
-
- Examples
- --------
- >>> G.add_node('a')
- >>> G.add_node('hello world')
- >>> G.add_node('Jack', age=10)
-
- >>> G.add_node('Jack', **{
- ... 'age': 10,
- ... 'gender': 'M'
- ... })
-
- """
- self._add_one_node(node_for_adding,node_attr)
- self._clear_cache()
-
-
[docs]defadd_nodes(self,nodes_for_adding:list,nodes_attr:List[Dict]=[]):
-"""Add nodes with a list of nodes.
-
- Parameters
- ----------
- nodes_for_adding : list
-
- nodes_attr : list of dict
- The corresponding attribute for each of *nodes_for_adding*.
-
- See Also
- --------
- add_node
-
- Examples
- --------
- Add nodes with a list of nodes.
- You can add with node attributes using a list of Python dict type,
- each of which is the attribute of each node, respectively.
-
- >>> G.add_nodes([1, 2, 'a', 'b'])
- >>> G.add_nodes(range(1, 200))
-
- >>> G.add_nodes(['Jack', 'Tom', 'Lily'], nodes_attr=[
- ... {
- ... 'age': 10,
- ... 'gender': 'M'
- ... },
- ... {
- ... 'age': 11,
- ... 'gender': 'M'
- ... },
- ... {
- ... 'age': 10,
- ... 'gender': 'F'
- ... }
- ... ])
-
- """
- ifnotlen(nodes_attr)==0:# Nodes attributes included in input
- assertlen(nodes_for_adding)==len(
- nodes_attr
- ),"Nodes and Attributes lists must have same length."
- else:# Set empty attribute for each node
- nodes_attr=[dict()foriinrange(len(nodes_for_adding))]
-
- foriinrange(len(nodes_for_adding)):
- try:
- self._add_one_node(nodes_for_adding[i],nodes_attr[i])
- exceptExceptionaserr:
- print(err)
- pass
- self._clear_cache()
-
-
[docs]defadd_nodes_from(self,nodes_for_adding,**attr):
-"""Add multiple nodes.
-
- Parameters
- ----------
- nodes_for_adding : iterable container
- A container of nodes (list, dict, set, etc.).
- OR
- A container of (node, attribute dict) tuples.
- Node attributes are updated using the attribute dict.
- attr : keyword arguments, optional (default= no attributes)
- Update attributes for all nodes in nodes.
- Node attributes specified in nodes as a tuple take
- precedence over attributes specified via keyword arguments.
-
- See Also
- --------
- add_node
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_nodes_from("Hello")
- >>> K3 = eg.Graph([(0, 1), (1, 2), (2, 0)])
- >>> G.add_nodes_from(K3)
- >>> sorted(G.nodes(), key=str)
- [0, 1, 2, 'H', 'e', 'l', 'o']
-
- Use keywords to update specific node attributes for every node.
-
- >>> G.add_nodes_from([1, 2], size=10)
- >>> G.add_nodes_from([3, 4], weight=0.4)
-
- Use (node, attrdict) tuples to update attributes for specific nodes.
-
- >>> G.add_nodes_from([(1, dict(size=11)), (2, {"color": "blue"})])
- >>> G.nodes[1]["size"]
- 11
- >>> H = eg.Graph()
- >>> H.add_nodes_from(G.nodes(data=True))
- >>> H.nodes[1]["size"]
- 11
-
- """
- forninnodes_for_adding:
- try:
- newnode=nnotinself._node
- newdict=attr
- exceptTypeError:
- n,ndict=n
- newnode=nnotinself._node
- newdict=attr.copy()
- newdict.update(ndict)
- ifnewnode:
- ifnisNone:
- raiseValueError("None cannot be a node")
- self._adj[n]=self.adjlist_inner_dict_factory()
- self._node[n]=self.node_attr_dict_factory()
- self._node[n].update(newdict)
- self._clear_cache()
-
- def_add_one_node(self,one_node_for_adding,node_attr:dict={}):
- node=one_node_for_adding
- ifnodenotinself._node:
- self._node_index[node]=self._id
- self._id+=1
- self._adj[node]=self.adjlist_inner_dict_factory()
- attr_dict=self._node[node]=self.node_attr_dict_factory()
- attr_dict.update(node_attr)
- else:# If already exists, there is no complain and still updating the node attribute
- self._node[node].update(node_attr)
- self._clear_cache()
-
-
[docs]defadd_edge(self,u_of_edge,v_of_edge,**edge_attr):
-"""Add one edge.
-
- Parameters
- ----------
- u_of_edge : object
- One end of this edge
-
- v_of_edge : object
- The other one end of this edge
-
- edge_attr : keywords arguments, optional
- The attribute of the edge.
-
- Notes
- -----
- Nodes of this edge will be automatically added to the graph, if they do not exist.
-
- See Also
- --------
- add_edges
-
- Examples
- --------
-
- >>> G.add_edge(1,2)
- >>> G.add_edge('Jack', 'Tom', weight=10)
-
- Add edge with attributes, edge weight, for example,
-
- >>> G.add_edge(1, 2, **{
- ... 'weight': 20
- ... })
-
- """
- self._add_one_edge(u_of_edge,v_of_edge,edge_attr)
- self._clear_cache()
[docs]defadd_edges(self,edges_for_adding,edges_attr:List[Dict]=[]):
-"""Add a list of edges.
-
- Parameters
- ----------
- edges_for_adding : list of 2-element tuple
- The edges for adding. Each element is a (u, v) tuple, and u, v are
- two ends of the edge.
-
- edges_attr : list of dict, optional
- The corresponding attributes for each edge in *edges_for_adding*.
-
- Examples
- --------
- Add a list of edges into *G*
-
- >>> G.add_edges([
- ... (1, 2),
- ... (3, 4),
- ... ('Jack', 'Tom')
- ... ])
-
- Add edge with attributes, for example, edge weight,
-
- >>> G.add_edges([(1,2), (2, 3)], edges_attr=[
- ... {
- ... 'weight': 20
- ... },
- ... {
- ... 'weight': 15
- ... }
- ... ])
-
- """
- ifedges_attrisNone:
- edges_attr=[]
- ifnotlen(edges_attr)==0:# Edges attributes included in input
- assertlen(edges_for_adding)==len(
- edges_attr
- ),"Edges and Attributes lists must have same length."
- else:# Set empty attribute for each edge
- edges_attr=[dict()foriinrange(len(edges_for_adding))]
-
- foriinrange(len(edges_for_adding)):
- try:
- edge=edges_for_adding[i]
- attr=edges_attr[i]
- assertlen(edge)==2,"Edge tuple {} must be 2-tuple.".format(edge)
- self._add_one_edge(edge[0],edge[1],attr)
- exceptExceptionaserr:
- print(err)
- self._clear_cache()
-
-
[docs]defadd_edges_from(self,ebunch_to_add,**attr):
-"""Add all the edges in ebunch_to_add.
-
- Parameters
- ----------
- ebunch_to_add : container of edges
- Each edge given in the container will be added to the
- graph. The edges must be given as 2-tuples (u, v) or
- 3-tuples (u, v, d) where d is a dictionary containing edge data.
- attr : keyword arguments, optional
- Edge data (or labels or objects) can be assigned using
- keyword arguments.
-
- See Also
- --------
- add_edge : add a single edge
- add_weighted_edges_from : convenient way to add weighted edges
-
- Notes
- -----
- Adding the same edge twice has no effect but any edge data
- will be updated when each duplicate edge is added.
-
- Edge attributes specified in an ebunch take precedence over
- attributes specified via keyword arguments.
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_edges_from([(0, 1), (1, 2)]) # using a list of edge tuples
- >>> e = zip(range(0, 3), range(1, 4))
- >>> G.add_edges_from(e) # Add the path graph 0-1-2-3
-
- Associate data to edges
-
- >>> G.add_edges_from([(1, 2), (2, 3)], weight=3)
- >>> G.add_edges_from([(3, 4), (1, 4)], label="WN2898")
- """
- foreinebunch_to_add:
- ne=len(e)
- ifne==3:
- u,v,dd=e
- elifne==2:
- u,v=e
- dd={}# doesn't need edge_attr_dict_factory
- else:
- raiseEasyGraphError(f"Edge tuple {e} must be a 2-tuple or 3-tuple.")
- ifunotinself._node:
- ifuisNone:
- raiseValueError("None cannot be a node")
- self._adj[u]=self.adjlist_inner_dict_factory()
- self._node[u]=self.node_attr_dict_factory()
- ifvnotinself._node:
- ifvisNone:
- raiseValueError("None cannot be a node")
- self._adj[v]=self.adjlist_inner_dict_factory()
- self._node[v]=self.node_attr_dict_factory()
- datadict=self._adj[u].get(v,self.edge_attr_dict_factory())
- datadict.update(attr)
- datadict.update(dd)
- self._adj[u][v]=datadict
- self._adj[v][u]=datadict
- self._clear_cache()
-
- defadd_weighted_edges_from(self,ebunch_to_add,weight="weight",**attr):
-"""Add weighted edges in `ebunch_to_add` with specified weight attr
-
- Parameters
- ----------
- ebunch_to_add : container of edges
- Each edge given in the list or container will be added
- to the graph. The edges must be given as 3-tuples (u, v, w)
- where w is a number.
- weight : string, optional (default= 'weight')
- The attribute name for the edge weights to be added.
- attr : keyword arguments, optional (default= no attributes)
- Edge attributes to add/update for all edges.
-
- See Also
- --------
- add_edge : add a single edge
- add_edges_from : add multiple edges
-
- Notes
- -----
- Adding the same edge twice for Graph/DiGraph simply updates
- the edge data. For MultiGraph/MultiDiGraph, duplicate edges
- are stored.
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_weighted_edges_from([(0, 1, 3.0), (1, 2, 7.5)])
- """
- self.add_edges_from(((u,v,{weight:d})foru,v,dinebunch_to_add),**attr)
-
-
[docs]defadd_weighted_edges_from(self,ebunch_to_add,weight="weight",**attr):
-"""Add weighted edges in `ebunch_to_add` with specified weight attr
-
- Parameters
- ----------
- ebunch_to_add : container of edges
- Each edge given in the list or container will be added
- to the graph. The edges must be given as 3-tuples (u, v, w)
- where w is a number.
- weight : string, optional (default= 'weight')
- The attribute name for the edge weights to be added.
- attr : keyword arguments, optional (default= no attributes)
- Edge attributes to add/update for all edges.
-
- See Also
- --------
- add_edge : add a single edge
- add_edges_from : add multiple edges
-
- Notes
- -----
- Adding the same edge twice for Graph/DiGraph simply updates
- the edge data. For MultiGraph/MultiDiGraph, duplicate edges
- are stored.
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_weighted_edges_from([(0, 1, 3.0), (1, 2, 7.5)])
- """
- self.add_edges_from(((u,v,{weight:d})foru,v,dinebunch_to_add),**attr)
-
-
[docs]defadd_edges_from_file(self,file,weighted=False):
-"""Added edges from file
- For example, txt files,
-
- Each line is in form like:
- a b 23.0
- which denotes an edge (a, b) with weight 23.0.
-
- Parameters
- ----------
- file : string
- The file path.
-
- weighted : boolean, optional (default : False)
- If the file consists of weight information, set `True`.
- The weight key will be set as 'weight'.
-
- Examples
- --------
-
- If `./club_network.txt` is:
-
- Jack Mary 23.0
-
- Mary Tom 15.0
-
- Tom Ben 20.0
-
- Then add them to *G*
-
- >>> G.add_edges_from_file(file='./club_network.txt', weighted=True)
-
-
- """
- importre
-
- withopen(file,"r")asfp:
- edges=fp.readlines()
- ifweighted:
- foredgeinedges:
- edge=re.sub(","," ",edge)
- edge=edge.split()
- try:
- self.add_edge(edge[0],edge[1],weight=float(edge[2]))
- except:
- pass
- else:
- foredgeinedges:
- edge=re.sub(","," ",edge)
- edge=edge.split()
- try:
- self.add_edge(edge[0],edge[1])
- except:
- pass
-
-
[docs]defremove_nodes_from(self,nodes):
-"""Remove multiple nodes.
-
- Parameters
- ----------
- nodes : iterable container
- A container of nodes (list, dict, set, etc.). If a node
- in the container is not in the graph it is silently
- ignored.
-
- See Also
- --------
- remove_node
-
- Examples
- --------
- >>> G = eg.path_graph(3) # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> e = list(G.nodes)
- >>> e
- [0, 1, 2]
- >>> G.remove_nodes_from(e)
- >>> list(G.nodes)
- []
-
- """
- adj=self._adj
- forninnodes:
- try:
- delself._node[n]
- foruinlist(adj[n]):# list handles self-loops
- deladj[u][n]# (allows mutation of dict in loop)
- deladj[n]
- exceptKeyError:
- pass
[docs]defremove_node(self,node_to_remove):
-"""Remove one node from your graph.
-
- Parameters
- ----------
- node_to_remove : object
- The node you want to remove.
-
- See Also
- --------
- remove_nodes
-
- Examples
- --------
- Remove node *Jack* from *G*
-
- >>> G.remove_node('Jack')
-
- """
- try:
- neighbors=list(self._adj[node_to_remove])
- delself._node[node_to_remove]
- exceptKeyError:# Node not exists in self
- raiseEasyGraphError("No node {} in graph.".format(node_to_remove))
- forneighborinneighbors:# Remove edges with other nodes
- delself._adj[neighbor][node_to_remove]
- delself._adj[node_to_remove]# Remove this node
- self._clear_cache()
-
-
[docs]defremove_nodes(self,nodes_to_remove:list):
-"""Remove nodes from your graph.
-
- Parameters
- ----------
- nodes_to_remove : list of object
- The list of nodes you want to remove.
-
- See Also
- --------
- remove_node
-
- Examples
- --------
- Remove node *[1, 2, 'a', 'b']* from *G*
-
- >>> G.remove_nodes([1, 2, 'a', 'b'])
-
- """
- for(
- node
- )in(
- nodes_to_remove
- ):# If not all nodes included in graph, give up removing other nodes
- assertnodeinself._node,"Remove Error: No node {} in graph".format(node)
- fornodeinnodes_to_remove:
- self.remove_node(node)
- self._clear_cache()
-
-
[docs]defremove_edge(self,u,v):
-"""Remove one edge from your graph.
-
- Parameters
- ----------
- u : object
- One end of the edge.
-
- v : object
- The other end of the edge.
-
- See Also
- --------
- remove_edges
-
- Examples
- --------
- Remove edge (1,2) from *G*
-
- >>> G.remove_edge(1,2)
-
- """
- try:
- delself._adj[u][v]
- ifu!=v:# self-loop needs only one entry removed
- delself._adj[v][u]
-
- self._clear_cache()
- exceptKeyError:
- raiseKeyError("No edge {}-{} in graph.".format(u,v))
-
-
[docs]defremove_edges(self,edges_to_remove:[tuple]):
-"""Remove a list of edges from your graph.
-
- Parameters
- ----------
- edges_to_remove : list of tuple
- The list of edges you want to remove,
- Each element is (u, v) tuple, which denote the two ends of the edge.
-
- See Also
- --------
- remove_edge
-
- Examples
- --------
- Remove the edges *('Jack', 'Mary')* amd *('Mary', 'Tom')* from *G*
-
- >>> G.remove_edge([
- ... ('Jack', 'Mary'),
- ... ('Mary', 'Tom')
- ... ])
-
- """
- foredgeinedges_to_remove:
- u,v=edge[:2]
- self.remove_edge(u,v)
- self._clear_cache()
[docs]defnumber_of_nodes(self):
-"""Returns the number of nodes.
-
- Returns
- -------
- number_of_nodes : int
- The number of nodes.
- """
- returnlen(self._node)
[docs]defis_multigraph(self):
-"""Returns True if graph is a multigraph, False otherwise."""
- returnFalse
-
-
[docs]defcopy(self):
-"""Return a deep copy of the graph.
-
- Returns
- -------
- copy : easygraph.Graph
- A deep copy of the original graph.
-
- Examples
- --------
- *G2* is a deep copy of *G1*
-
- >>> G2 = G1.copy()
-
- """
- G=self.__class__()
- G.graph.update(self.graph)
- fornode,node_attrinself._node.items():
- G.add_node(node,**node_attr)
- foru,nbrsinself._adj.items():
- forv,edge_datainnbrs.items():
- G.add_edge(u,v,**edge_data)
-
- returnG
-
-
[docs]defnodes_subgraph(self,from_nodes:list):
-"""Returns a subgraph of some nodes
-
- Parameters
- ----------
- from_nodes : list of object
- The nodes in subgraph.
-
- Returns
- -------
- nodes_subgraph : easygraph.Graph
- The subgraph consisting of *from_nodes*.
-
- Examples
- --------
-
- >>> G = eg.Graph()
- >>> G.add_edges([(1,2), (2,3), (2,4), (4,5)])
- >>> G_sub = G.nodes_subgraph(from_nodes= [1,2,3])
-
- """
- G=self.__class__()
- G.graph.update(self.graph)
- from_nodes=set(from_nodes)
- fornodeinfrom_nodes:
- try:
- G.add_node(node,**self._node[node])
- exceptKeyError:
- pass
-
- forv,edge_datainself._adj[node].items():
- ifvinfrom_nodes:
- G.add_edge(node,v,**edge_data)
- returnG
-
-
[docs]defego_subgraph(self,center):
-"""Returns an ego network graph of a node.
-
- Parameters
- ----------
- center : object
- The center node of the ego network graph
-
- Returns
- -------
- ego_subgraph : easygraph.Graph
- The ego network graph of *center*.
-
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_edges([
- ... ('Jack', 'Maria'),
- ... ('Maria', 'Andy'),
- ... ('Jack', 'Tom')
- ... ])
- >>> G.ego_subgraph(center='Jack')
- """
- neighbors_of_center=list(self.all_neighbors(center))
- neighbors_of_center.append(center)
- returnself.nodes_subgraph(from_nodes=neighbors_of_center)
-
-
[docs]defto_index_node_graph(self,begin_index=0):
-"""Returns a deep copy of graph, with each node switched to its index.
-
- Considering that the nodes of your graph may be any possible hashable Python object,
- you can get an isomorphic graph of the original one, with each node switched to its index.
-
- Parameters
- ----------
- begin_index : int
- The begin index of the index graph.
-
- Returns
- -------
- G : easygraph.Graph
- Deep copy of graph, with each node switched to its index.
-
- index_of_node : dict
- Index of node
-
- node_of_index : dict
- Node of index
-
- Examples
- --------
- The following method returns this isomorphic graph and index-to-node dictionary
- as well as node-to-index dictionary.
-
- >>> G = eg.Graph()
- >>> G.add_edges([
- ... ('Jack', 'Maria'),
- ... ('Maria', 'Andy'),
- ... ('Jack', 'Tom')
- ... ])
- >>> G_index_graph, index_of_node, node_of_index = G.to_index_node_graph()
-
- """
- G=self.__class__()
- G.graph.update(self.graph)
- index_of_node=dict()
- node_of_index=dict()
- forindex,(node,node_attr)inenumerate(self._node.items()):
- G.add_node(index+begin_index,**node_attr)
- index_of_node[node]=index+begin_index
- node_of_index[index+begin_index]=node
- foru,nbrsinself._adj.items():
- forv,edge_datainnbrs.items():
- G.add_edge(index_of_node[u],index_of_node[v],**edge_data)
-
- returnG,index_of_node,node_of_index
-
-
[docs]defto_directed_class(self):
-"""Returns the class to use for empty directed copies.
-
- If you subclass the base classes, use this to designate
- what directed class to use for `to_directed()` copies.
- """
- returneg.DiGraph
-
-
[docs]defto_directed(self):
-"""Returns a directed representation of the graph.
-
- Returns
- -------
- G : DiGraph
- A directed graph with the same name, same nodes, and with
- each edge (u, v, data) replaced by two directed edges
- (u, v, data) and (v, u, data).
-
- Notes
- -----
- This returns a "deepcopy" of the edge, node, and
- graph attributes which attempts to completely copy
- all of the data and references.
-
- This is in contrast to the similar D=DiGraph(G) which returns a
- shallow copy of the data.
-
- See the Python copy module for more information on shallow
- and deep copies, https://docs.python.org/3/library/copy.html.
-
- Warning: If you have subclassed Graph to use dict-like objects
- in the data structure, those changes do not transfer to the
- DiGraph created by this method.
-
- Examples
- --------
- >>> G = eg.Graph() # or MultiGraph, etc
- >>> G.add_edge(0, 1)
- >>> H = G.to_directed()
- >>> list(H.edges)
- [(0, 1), (1, 0)]
-
- If already directed, return a (deep) copy
-
- >>> G = eg.DiGraph() # or MultiDiGraph, etc
- >>> G.add_edge(0, 1)
- >>> H = G.to_directed()
- >>> list(H.edges)
- [(0, 1)]
- """
- graph_class=self.to_directed_class()
-
- G=graph_class()
- G.graph.update(deepcopy(self.graph))
- G.add_nodes_from((n,deepcopy(d))forn,dinself._node.items())
- G.add_edges_from(
- (u,v,deepcopy(data))
- foru,nbrsinself._adj.items()
- forv,datainnbrs.items()
- )
- returnG
-
- def_clear_cache(self):
-r"""Clear the cache."""
- self.cache={}
-
-
[docs]classHypergraph(BaseHypergraph):
-r"""The ``Hypergraph`` class is developed for hypergraph structures.
-
- Args:
- ``num_v`` (``int``): The number of vertices in the hypergraph.
- ``e_list`` (``Union[List[int], List[List[int]]]``, optional): A list of hyperedges describes how the vertices point to the hyperedges. Defaults to ``None``.
- ``e_weight`` (``Union[float, List[float]]``, optional): A list of weights for hyperedges. If set to ``None``, the value ``1`` is used for all hyperedges. Defaults to ``None``.
- ``merge_op`` (``str``): The operation to merge those conflicting hyperedges in the same hyperedge group, which can be ``'mean'``, ``'sum'`` or ``'max'``. Defaults to ``'mean'``.
- ``device`` (``torch.device``, optional): The device to store the hypergraph. Defaults to ``torch.device('cpu')``.
- """
-
- def__init__(
- self,
- num_v:int,
- e_list:Optional[Union[List[int],List[List[int]]]]=None,
- e_weight:Optional[Union[float,List[float]]]=None,
- merge_op:str="mean",
- device:torch.device=torch.device("cpu"),
- ):
- super().__init__(num_v,device=device)
- ife_listisnotNone:
- self.add_hyperedges(e_list,e_weight,merge_op=merge_op)
-
- def__repr__(self)->str:
-r"""Print the hypergraph information."""
- returnf"Hypergraph(num_vertex={self.num_v}, num_hyperedge={self.num_e})"
-
- @property
- defstate_dict(self)->Dict[str,Any]:
-r"""Get the state dict of the hypergraph."""
- return{"num_v":self.num_v,"raw_groups":self._raw_groups}
-
-
[docs]defsave(self,file_path:Union[str,Path]):
-r"""Save the DHG's hypergraph structure a file.
-
- Args:
- ``file_path`` (``Union[str, Path]``): The file path to store the DHG's hypergraph structure.
- """
- file_path=Path(file_path)
- assertfile_path.parent.exists(),"The directory does not exist."
- data={
- "class":"Hypergraph",
- "state_dict":self.state_dict,
- }
- withopen(file_path,"wb")asfp:
- pickle.dump(data,fp)
-
-
[docs]@staticmethod
- defload(file_path:Union[str,Path]):
-r"""Load the DHG's hypergraph structure from a file.
-
- Args:
- ``file_path`` (``Union[str, Path]``): The file path to load the DHG's hypergraph structure.
- """
- file_path=Path(file_path)
- assertfile_path.exists(),"The file does not exist."
- withopen(file_path,"rb")asfp:
- data=pickle.load(fp)
- assertdata["class"]=="Hypergraph","The file is not a DHG's hypergraph file."
- returnHypergraph.from_state_dict(data["state_dict"])
-
-
[docs]defdraw(
- self,
- e_style:str="circle",
- v_label:Optional[List[str]]=None,
- v_size:Union[float,list]=1.0,
- v_color:Union[str,list]="r",
- v_line_width:Union[str,list]=1.0,
- e_color:Union[str,list]="gray",
- e_fill_color:Union[str,list]="whitesmoke",
- e_line_width:Union[str,list]=1.0,
- font_size:float=1.0,
- font_family:str="sans-serif",
- push_v_strength:float=1.0,
- push_e_strength:float=1.0,
- pull_e_strength:float=1.0,
- pull_center_strength:float=1.0,
- ):
-r"""Draw the hypergraph structure.
-
- Args:
- ``e_style`` (``str``): The style of hyperedges. The available styles are only ``'circle'``. Defaults to ``'circle'``.
- ``v_label`` (``list``): The labels of vertices. Defaults to ``None``.
- ``v_size`` (``float`` or ``list``): The size of vertices. Defaults to ``1.0``.
- ``v_color`` (``str`` or ``list``): The `color <https://matplotlib.org/stable/gallery/color/named_colors.html>`_ of vertices. Defaults to ``'r'``.
- ``v_line_width`` (``float`` or ``list``): The line width of vertices. Defaults to ``1.0``.
- ``e_color`` (``str`` or ``list``): The `color <https://matplotlib.org/stable/gallery/color/named_colors.html>`_ of hyperedges. Defaults to ``'gray'``.
- ``e_fill_color`` (``str`` or ``list``): The fill `color <https://matplotlib.org/stable/gallery/color/named_colors.html>`_ of hyperedges. Defaults to ``'whitesmoke'``.
- ``e_line_width`` (``float`` or ``list``): The line width of hyperedges. Defaults to ``1.0``.
- ``font_size`` (``float``): The font size of labels. Defaults to ``1.0``.
- ``font_family`` (``str``): The font family of labels. Defaults to ``'sans-serif'``.
- ``push_v_strength`` (``float``): The strength of pushing vertices. Defaults to ``1.0``.
- ``push_e_strength`` (``float``): The strength of pushing hyperedges. Defaults to ``1.0``.
- ``pull_e_strength`` (``float``): The strength of pulling hyperedges. Defaults to ``1.0``.
- ``pull_center_strength`` (``float``): The strength of pulling vertices to the center. Defaults to ``1.0``.
- """
- draw_hypergraph(
- self,
- e_style,
- v_label,
- v_size,
- v_color,
- v_line_width,
- e_color,
- e_fill_color,
- e_line_width,
- font_size,
- font_family,
- push_v_strength,
- push_e_strength,
- pull_e_strength,
- pull_center_strength,
- )
-
-
[docs]defclear(self):
-r"""Clear all hyperedges and caches from the hypergraph."""
- returnsuper().clear()
-
-
[docs]defclone(self)->"Hypergraph":
-r"""Return a copy of the hypergraph."""
- hg=Hypergraph(self.num_v,device=self.device)
- hg._raw_groups=deepcopy(self._raw_groups)
- hg.cache=deepcopy(self.cache)
- hg.group_cache=deepcopy(self.group_cache)
- returnhg
-
-
[docs]defto(self,device:torch.device):
-r"""Move the hypergraph to the specified device.
-
- Args:
- ``device`` (``torch.device``): The target device.
- """
- returnsuper().to(device)
-
- # =====================================================================================
- # some construction functions
-
[docs]@staticmethod
- deffrom_state_dict(state_dict:dict):
-r"""Load the hypergraph from the state dict.
-
- Args:
- ``state_dict`` (``dict``): The state dict to load the hypergraph.
- """
- _hg=Hypergraph(state_dict["num_v"])
- _hg._raw_groups=deepcopy(state_dict["raw_groups"])
- return_hg
-
- @staticmethod
- def_e_list_from_feature_kNN(features:torch.Tensor,k:int):
- importscipy
-
-r"""Construct hyperedges from the feature matrix. Each hyperedge in the hypergraph is constructed by the central vertex ans its :math:`k-1` neighbor vertices.
-
- Args:
- ``features`` (``torch.Tensor``): The feature matrix.
- ``k`` (``int``): The number of nearest neighbors.
- """
- features=features.cpu().numpy()
- assertfeatures.ndim==2,"The feature matrix should be 2-D."
- assertk<=features.shape[0],(
- "The number of nearest neighbors should be less than or equal to the number"
- " of vertices."
- )
- tree=scipy.spatial.cKDTree(features)
- _,nbr_array=tree.query(features,k=k)
- returnnbr_array.tolist()
-
-
[docs]@staticmethod
- deffrom_feature_kNN(
- features:torch.Tensor,k:int,device:torch.device=torch.device("cpu")
- ):
-r"""Construct the hypergraph from the feature matrix. Each hyperedge in the hypergraph is constructed by the central vertex ans its :math:`k-1` neighbor vertices.
-
- .. note::
- The constructed hypergraph is a k-uniform hypergraph. If the feature matrix has the size :math:`N \times C`, the number of vertices and hyperedges of the constructed hypergraph are both :math:`N`.
-
- Args:
- ``features`` (``torch.Tensor``): The feature matrix.
- ``k`` (``int``): The number of nearest neighbors.
- ``device`` (``torch.device``, optional): The device to store the hypergraph. Defaults to ``torch.device('cpu')``.
- """
- e_list=Hypergraph._e_list_from_feature_kNN(features,k)
- hg=Hypergraph(features.shape[0],e_list,device=device)
- returnhg
-
-
[docs]@staticmethod
- deffrom_graph(graph,device:torch.device=torch.device("cpu"))->"Hypergraph":
-r"""Construct the hypergraph from the graph. Each edge in the graph is treated as a hyperedge in the constructed hypergraph.
-
- .. note::
- The construsted hypergraph is a 2-uniform hypergraph, and has the same number of vertices and edges/hyperedges as the graph.
-
- Args:
- ``graph`` (``eg.Graph``): The graph to construct the hypergraph.
- ``device`` (``torch.device``, optional): The device to store the hypergraph. Defaults to ``torch.device('cpu')``.
- """
- e_list,e_weight=graph.e
- hg=Hypergraph(len(graph.nodes),e_list,e_weight=e_weight,device=device)
- returnhg
-
- @staticmethod
- def_e_list_from_graph_kHop(
- graph,
- k:int,
- only_kHop:bool=False,
- )->List[tuple]:
-r"""Construct the hyperedge list from the graph by k-Hop neighbors. Each hyperedge in the hypergraph is constructed by the central vertex and its :math:`k`-Hop neighbor vertices.
-
- .. note::
- If the graph have :math:`|\mathcal{V}|` vertices, the constructed hypergraph will have :math:`|\mathcal{V}|` vertices and equal to or less than :math:`|\mathcal{V}|` hyperedges.
-
- Args:
- ``graph`` (``eg.Graph``): The graph to construct the hypergraph.
- ``k`` (``int``): The number of hop neighbors.
- ``only_kHop`` (``bool``, optional): If set to ``True``, only the central vertex and its :math:`k`-th Hop neighbors are used to construct the hyperedges. By default, the constructed hyperedge will include the central vertex and its [ :math:`1`-th, :math:`2`-th, :math:`\cdots`, :math:`k`-th ] Hop neighbors. Defaults to ``False``.
- """
- assert(
- k>=1
- ),"The number of hop neighbors should be larger than or equal to 1."
- A_1,A_k=graph.A.clone(),graph.A.clone()
- A_history=[]
- for_inrange(k-1):
- A_k=torch.sparse.mm(A_k,A_1)
- ifnotonly_kHop:
- A_history.append(A_k.clone())
- ifnotonly_kHop:
- A_k=A_1
- forA_inA_history:
- A_k=A_k+A_
- e_list=[
- tuple(set([v_idx]+A_k[v_idx]._indices().cpu().squeeze(0).tolist()))
- forv_idxinrange(len(graph.nodes))
- ]
- returne_list
-
-
[docs]@staticmethod
- deffrom_graph_kHop(
- graph,
- k:int,
- only_kHop:bool=False,
- device:torch.device=torch.device("cpu"),
- )->"Hypergraph":
-r"""Construct the hypergraph from the graph by k-Hop neighbors. Each hyperedge in the hypergraph is constructed by the central vertex and its :math:`k`-Hop neighbor vertices.
-
- .. note::
- If the graph have :math:`|\mathcal{V}|` vertices, the constructed hypergraph will have :math:`|\mathcal{V}|` vertices and equal to or less than :math:`|\mathcal{V}|` hyperedges.
-
- Args:
- ``graph`` (``eg.Graph``): The graph to construct the hypergraph.
- ``k`` (``int``): The number of hop neighbors.
- ``only_kHop`` (``bool``): If set to ``True``, only the central vertex and its :math:`k`-th Hop neighbors are used to construct the hyperedges. By default, the constructed hyperedge will include the central vertex and its [ :math:`1`-th, :math:`2`-th, :math:`\cdots`, :math:`k`-th ] Hop neighbors. Defaults to ``False``.
- ``device`` (``torch.device``, optional): The device to store the hypergraph. Defaults to ``torch.device('cpu')``.
- """
- e_list=Hypergraph._e_list_from_graph_kHop(graph,k,only_kHop)
- hg=Hypergraph(len(graph.nodes),e_list,device=device)
- returnhg
-
-
[docs]defadd_hyperedges(
- self,
- e_list:Union[List[int],List[List[int]]],
- e_weight:Optional[Union[float,List[float]]]=None,
- merge_op:str="mean",
- group_name:str="main",
- ):
-r"""Add hyperedges to the hypergraph. If the ``group_name`` is not specified, the hyperedges will be added to the default ``main`` hyperedge group.
-
- Args:
- ``num_v`` (``int``): The number of vertices in the hypergraph.
- ``e_list`` (``Union[List[int], List[List[int]]]``): A list of hyperedges describes how the vertices point to the hyperedges.
- ``e_weight`` (``Union[float, List[float]]``, optional): A list of weights for hyperedges. If set to ``None``, the value ``1`` is used for all hyperedges. Defaults to ``None``.
- ``merge_op`` (``str``): The merge operation for the conflicting hyperedges. The possible values are ``"mean"``, ``"sum"``, and ``"max"``. Defaults to ``"mean"``.
- ``group_name`` (``str``, optional): The target hyperedge group to add these hyperedges. Defaults to the ``main`` hyperedge group.
- """
- e_list=self._format_e_list(e_list)
- ife_weightisNone:
- e_weight=[1.0]*len(e_list)
- eliftype(e_weight)in(int,float):
- e_weight=[e_weight]
- eliftype(e_weight)islist:
- pass
- else:
- raiseTypeError(
- "The type of e_weight should be float or list, but got"
- f" {type(e_weight)}"
- )
- assertlen(e_list)==len(
- e_weight
- ),"The number of hyperedges and the number of weights are not equal."
-
- for_idxinrange(len(e_list)):
- self._add_hyperedge(
- self._hyperedge_code(e_list[_idx],e_list[_idx]),
- {"w_e":float(e_weight[_idx])},
- merge_op,
- group_name,
- )
- self._clear_cache(group_name)
-
-
[docs]defadd_hyperedges_from_feature_kNN(
- self,feature:torch.Tensor,k:int,group_name:str="main"
- ):
-r"""Add hyperedges from the feature matrix by k-NN. Each hyperedge is constructed by the central vertex and its :math:`k`-Nearest Neighbor vertices.
-
- Args:
- ``features`` (``torch.Tensor``): The feature matrix.
- ``k`` (``int``): The number of nearest neighbors.
- ``group_name`` (``str``, optional): The target hyperedge group to add these hyperedges. Defaults to the ``main`` hyperedge group.
- """
- assertfeature.shape[0]==self.num_v,(
- "The number of vertices in the feature matrix is not equal to the number of"
- " vertices in the hypergraph."
- )
- e_list=Hypergraph._e_list_from_feature_kNN(feature,k)
- self.add_hyperedges(e_list,group_name=group_name)
-
-
[docs]defadd_hyperedges_from_graph(self,graph,group_name:str="main"):
-r"""Add hyperedges from edges in the graph. Each edge in the graph is treated as a hyperedge.
-
- Args:
- ``graph`` (``eg.Graph``): The graph to join the hypergraph.
- ``group_name`` (``str``, optional): The target hyperedge group to add these hyperedges. Defaults to the ``main`` hyperedge group.
- """
- assertself.num_v==len(
- graph.nodes
- ),"The number of vertices in the hypergraph and the graph are not equal."
- e_list,e_weight=graph.e_both_side
- self.add_hyperedges(e_list,e_weight=e_weight,group_name=group_name)
-
-
[docs]defadd_hyperedges_from_graph_kHop(
- self,graph,k:int,only_kHop:bool=False,group_name:str="main"
- ):
-r"""Add hyperedges from vertices and its k-Hop neighbors in the graph. Each hyperedge in the hypergraph is constructed by the central vertex and its :math:`k`-Hop neighbor vertices.
-
- .. note::
- If the graph have :math:`|\mathcal{V}|` vertices, the constructed hypergraph will have :math:`|\mathcal{V}|` vertices and equal to or less than :math:`|\mathcal{V}|` hyperedges.
-
- Args:
- ``graph`` (``eg.Graph``): The graph to join the hypergraph.
- ``k`` (``int``): The number of hop neighbors.
- ``only_kHop`` (``bool``): If set to ``True``, only the central vertex and its :math:`k`-th Hop neighbors are used to construct the hyperedges. By default, the constructed hyperedge will include the central vertex and its [ :math:`1`-th, :math:`2`-th, :math:`\cdots`, :math:`k`-th ] Hop neighbors. Defaults to ``False``.
- ``group_name`` (``str``, optional): The target hyperedge group to add these hyperedges. Defaults to the ``main`` hyperedge group.
- """
- assertself.num_v==len(
- graph.nodes
- ),"The number of vertices in the hypergraph and the graph are not equal."
- e_list=Hypergraph._e_list_from_graph_kHop(graph,k,only_kHop=only_kHop)
- self.add_hyperedges(e_list,group_name=group_name)
-
-
[docs]defremove_hyperedges(
- self,
- e_list:Union[List[int],List[List[int]]],
- group_name:Optional[str]=None,
- ):
-r"""Remove the specified hyperedges from the hypergraph.
-
- Args:
- ``e_list`` (``Union[List[int], List[List[int]]]``): A list of hyperedges describes how the vertices point to the hyperedges.
- ``group_name`` (``str``, optional): Remove these hyperedges from the specified hyperedge group. If not specified, the function will
- remove those hyperedges from all hyperedge groups. Defaults to the ``None``.
- """
- assert(
- group_nameisNoneorgroup_nameinself.group_names
- ),"The specified group_name is not in existing hyperedge groups."
- e_list=self._format_e_list(e_list)
- ifgroup_nameisNone:
- for_idxinrange(len(e_list)):
- e_code=self._hyperedge_code(e_list[_idx],e_list[_idx])
- fornameinself.group_names:
- self._raw_groups[name].pop(e_code,None)
- else:
- for_idxinrange(len(e_list)):
- e_code=self._hyperedge_code(e_list[_idx],e_list[_idx])
- self._raw_groups[group_name].pop(e_code,None)
- self._clear_cache(group_name)
-
-
[docs]defremove_group(self,group_name:str):
-r"""Remove the specified hyperedge group from the hypergraph.
-
- Args:
- ``group_name`` (``str``): The name of the hyperedge group to remove.
- """
- self._raw_groups.pop(group_name,None)
- self._clear_cache(group_name)
-
-
[docs]defdrop_hyperedges(self,drop_rate:float,ord="uniform"):
-r"""Randomly drop hyperedges from the hypergraph. This function will return a new hypergraph with non-dropped hyperedges.
-
- Args:
- ``drop_rate`` (``float``): The drop rate of hyperedges.
- ``ord`` (``str``): The order of dropping edges. Currently, only ``'uniform'`` is supported. Defaults to ``uniform``.
- """
- iford=="uniform":
- _raw_groups={}
- fornameinself.group_names:
- _raw_groups[name]={
- k:v
- fork,vinself._raw_groups[name].items()
- ifrandom.random()>drop_rate
- }
- state_dict={
- "num_v":self.num_v,
- "raw_groups":_raw_groups,
- }
- _hg=Hypergraph.from_state_dict(state_dict)
- _hg=_hg.to(self.device)
- else:
- raiseValueError(f"Unknown drop order: {ord}.")
- return_hg
-
-
[docs]defdrop_hyperedges_of_group(
- self,group_name:str,drop_rate:float,ord="uniform"
- ):
-r"""Randomly drop hyperedges from the specified hyperedge group. This function will return a new hypergraph with non-dropped hyperedges.
-
- Args:
- ``group_name`` (``str``): The name of the hyperedge group.
- ``drop_rate`` (``float``): The drop rate of hyperedges.
- ``ord`` (``str``): The order of dropping edges. Currently, only ``'uniform'`` is supported. Defaults to ``uniform``.
- """
- iford=="uniform":
- _raw_groups={}
- fornameinself.group_names:
- ifname==group_name:
- _raw_groups[name]={
- k:v
- fork,vinself._raw_groups[name].items()
- ifrandom.random()>drop_rate
- }
- else:
- _raw_groups[name]=self._raw_groups[name]
- state_dict={
- "num_v":self.num_v,
- "raw_groups":_raw_groups,
- }
- _hg=Hypergraph.from_state_dict(state_dict)
- _hg=_hg.to(self.device)
- else:
- raiseValueError(f"Unknown drop order: {ord}.")
- return_hg
-
- # =====================================================================================
- # properties for representation
- @property
- defv(self)->List[int]:
-r"""Return the list of vertices."""
- returnsuper().v
-
- @property
- defe(self)->Tuple[List[List[int]],List[float]]:
-r"""Return all hyperedges and weights in the hypergraph."""
- ifself.cache.get("e",None)isNone:
- e_list,e_weight=[],[]
- fornameinself.group_names:
- _e=self.e_of_group(name)
- e_list.extend(_e[0])
- e_weight.extend(_e[1])
- self.cache["e"]=(e_list,e_weight)
- returnself.cache["e"]
-
-
[docs]defe_of_group(self,group_name:str)->Tuple[List[List[int]],List[float]]:
-r"""Return all hyperedges and weights of the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("e",None)isNone:
- e_list=[e_code[0]fore_codeinself._raw_groups[group_name].keys()]
- e_weight=[
- e_content["w_e"]fore_contentinself._raw_groups[group_name].values()
- ]
- self.group_cache[group_name]["e"]=(e_list,e_weight)
- returnself.group_cache[group_name]["e"]
-
- @property
- defnum_v(self)->int:
-r"""Return the number of vertices in the hypergraph."""
- returnsuper().num_v
-
- @property
- defnum_e(self)->int:
-r"""Return the number of hyperedges in the hypergraph."""
- returnsuper().num_e
-
-
[docs]defnum_e_of_group(self,group_name:str)->int:
-r"""Return the number of hyperedges of the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- returnsuper().num_e_of_group(group_name)
-
- @property
- defdeg_v(self)->List[int]:
-r"""Return the degree list of each vertex."""
- returnself.D_v._values().cpu().view(-1).numpy().tolist()
-
-
[docs]defdeg_v_of_group(self,group_name:str)->List[int]:
-r"""Return the degree list of each vertex of the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.D_v_of_group(group_name)._values().cpu().view(-1).numpy().tolist()
-
- @property
- defdeg_e(self)->List[int]:
-r"""Return the degree list of each hyperedge."""
- returnself.D_e._values().cpu().view(-1).numpy().tolist()
-
-
[docs]defdeg_e_of_group(self,group_name:str)->List[int]:
-r"""Return the degree list of each hyperedge of the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.D_e_of_group(group_name)._values().cpu().view(-1).numpy().tolist()
-
-
[docs]defnbr_e(self,v_idx:int)->List[int]:
-r"""Return the neighbor hyperedge list of the specified vertex.
-
- Args:
- ``v_idx`` (``int``): The index of the vertex.
- """
- returnself.N_e(v_idx).cpu().numpy().tolist()
-
-
[docs]defnbr_e_of_group(self,v_idx:int,group_name:str)->List[int]:
-r"""Return the neighbor hyperedge list of the specified vertex of the specified hyperedge group.
-
- Args:
- ``v_idx`` (``int``): The index of the vertex.
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.N_e_of_group(v_idx,group_name).cpu().numpy().tolist()
-
-
[docs]defnbr_v(self,e_idx:int)->List[int]:
-r"""Return the neighbor vertex list of the specified hyperedge.
-
- Args:
- ``e_idx`` (``int``): The index of the hyperedge.
- """
- returnself.N_v(e_idx).cpu().numpy().tolist()
-
-
[docs]defnbr_v_of_group(self,e_idx:int,group_name:str)->List[int]:
-r"""Return the neighbor vertex list of the specified hyperedge of the specified hyperedge group.
-
- Args:
- ``e_idx`` (``int``): The index of the hyperedge.
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.N_v_of_group(e_idx,group_name).cpu().numpy().tolist()
-
- @property
- defnum_groups(self)->int:
-r"""Return the number of hyperedge groups in the hypergraph."""
- returnsuper().num_groups
-
- @property
- defgroup_names(self)->List[str]:
-r"""Return the names of all hyperedge groups in the hypergraph."""
- returnsuper().group_names
-
- # =====================================================================================
- # properties for deep learning
- @property
- defvars_for_DL(self)->List[str]:
-r"""Return a name list of available variables for deep learning in the hypergraph including
-
- Sparse Matrices:
-
- .. math::
- \mathbf{H}, \mathbf{H}^\top, \mathcal{L}_{sym}, \mathcal{L}_{rw} \mathcal{L}_{HGNN},
-
- Sparse Diagnal Matrices:
-
- .. math::
- \mathbf{W}_e, \mathbf{D}_v, \mathbf{D}_v^{-1}, \mathbf{D}_v^{-\frac{1}{2}}, \mathbf{D}_e, \mathbf{D}_e^{-1},
-
- Vectors:
-
- .. math::
- \overrightarrow{v2e}_{src}, \overrightarrow{v2e}_{dst}, \overrightarrow{v2e}_{weight},\\
- \overrightarrow{e2v}_{src}, \overrightarrow{e2v}_{dst}, \overrightarrow{e2v}_{weight}
-
- """
- return[
- "H",
- "H_T",
- "L_sym",
- "L_rw",
- "L_HGNN",
- "W_e",
- "D_v",
- "D_v_neg_1",
- "D_v_neg_1_2",
- "D_e",
- "D_e_neg_1",
- "v2e_src",
- "v2e_dst",
- "v2e_weighte2v_src",
- "e2v_dst",
- "e2v_weight",
- ]
-
- @property
- defv2e_src(self)->torch.Tensor:
-r"""Return the source vertex index vector :math:`\overrightarrow{v2e}_{src}` of the connections (vertices point to hyperedges) in the hypergraph.
- """
- returnself.H_T._indices()[1].clone()
-
-
[docs]defv2e_src_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the source vertex index vector :math:`\overrightarrow{v2e}_{src}` of the connections (vertices point to hyperedges) in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.H_T_of_group(group_name)._indices()[1].clone()
-
- @property
- defv2e_dst(self)->torch.Tensor:
-r"""Return the destination hyperedge index vector :math:`\overrightarrow{v2e}_{dst}` of the connections (vertices point to hyperedges) in the hypergraph.
- """
- returnself.H_T._indices()[0].clone()
-
-
[docs]defv2e_dst_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the destination hyperedge index vector :math:`\overrightarrow{v2e}_{dst}` of the connections (vertices point to hyperedges) in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.H_T_of_group(group_name)._indices()[0].clone()
-
- @property
- defv2e_weight(self)->torch.Tensor:
-r"""Return the weight vector :math:`\overrightarrow{v2e}_{weight}` of the connections (vertices point to hyperedges) in the hypergraph.
- """
- returnself.H_T._values().clone()
-
-
[docs]defv2e_weight_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the weight vector :math:`\overrightarrow{v2e}_{weight}` of the connections (vertices point to hyperedges) in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.H_T_of_group(group_name)._values().clone()
-
- @property
- defe2v_src(self)->torch.Tensor:
-r"""Return the source hyperedge index vector :math:`\overrightarrow{e2v}_{src}` of the connections (hyperedges point to vertices) in the hypergraph.
- """
- returnself.H._indices()[1].clone()
-
-
[docs]defe2v_src_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the source hyperedge index vector :math:`\overrightarrow{e2v}_{src}` of the connections (hyperedges point to vertices) in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.H_of_group(group_name)._indices()[1].clone()
-
- @property
- defe2v_dst(self)->torch.Tensor:
-r"""Return the destination vertex index vector :math:`\overrightarrow{e2v}_{dst}` of the connections (hyperedges point to vertices) in the hypergraph.
- """
- returnself.H._indices()[0].clone()
-
-
[docs]defe2v_dst_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the destination vertex index vector :math:`\overrightarrow{e2v}_{dst}` of the connections (hyperedges point to vertices) in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.H_of_group(group_name)._indices()[0].clone()
-
- @property
- defe2v_weight(self)->torch.Tensor:
-r"""Return the weight vector :math:`\overrightarrow{e2v}_{weight}` of the connections (hyperedges point to vertices) in the hypergraph.
- """
- returnself.H._values().clone()
-
-
[docs]defe2v_weight_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the weight vector :math:`\overrightarrow{e2v}_{weight}` of the connections (hyperedges point to vertices) in the specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- returnself.H_of_group(group_name)._values().clone()
[docs]defH_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the hypergraph incidence matrix :math:`\mathbf{H}` of the specified hyperedge group with ``torch.Tensor`` format.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("H")isNone:
- self.group_cache[group_name]["H"]=self.H_v2e_of_group(group_name)
- returnself.group_cache[group_name]["H"]
-
- @property
- defH_T(self)->torch.Tensor:
-r"""Return the transpose of the hypergraph incidence matrix :math:`\mathbf{H}^\top` with ``torch.Tensor`` format.
- """
- ifself.cache.get("H_T")isNone:
- self.cache["H_T"]=self.H.t()
- returnself.cache["H_T"]
-
-
[docs]defH_T_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the transpose of the hypergraph incidence matrix :math:`\mathbf{H}^\top` of the specified hyperedge group with ``torch.Tensor`` format.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("H_T")isNone:
- self.group_cache[group_name]["H_T"]=self.H_of_group(group_name).t()
- returnself.group_cache[group_name]["H_T"]
[docs]defW_e_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the weight matrix :math:`\mathbf{W}_e` of hyperedges of the specified hyperedge group with ``torch.Tensor`` format.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("W_e")isNone:
- _tmp=self._fetch_W_of_group(group_name).view(-1)
- _num_e=_tmp.size(0)
- self.group_cache[group_name]["W_e"]=torch.sparse_coo_tensor(
- torch.arange(0,_num_e).view(1,-1).repeat(2,1),
- _tmp,
- torch.Size([_num_e,_num_e]),
- device=self.device,
- ).coalesce()
- returnself.group_cache[group_name]["W_e"]
[docs]defD_v_neg_1_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the vertex degree matrix :math:`\mathbf{D}_v^{-1}` of the specified hyperedge group with ``torch.sparse_coo_tensor`` format.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("D_v_neg_1")isNone:
- _mat=self.D_v_of_group(group_name).clone()
- _val=_mat._values()**-1
- _val[torch.isinf(_val)]=0
- self.group_cache[group_name]["D_v_neg_1"]=torch.sparse_coo_tensor(
- _mat._indices(),_val,_mat.size(),device=self.device
- ).coalesce()
- returnself.group_cache[group_name]["D_v_neg_1"]
[docs]defD_v_neg_1_2_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the vertex degree matrix :math:`\mathbf{D}_v^{-\frac{1}{2}}` of the specified hyperedge group with ``torch.sparse_coo_tensor`` format.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("D_v_neg_1_2")isNone:
- _mat=self.D_v_of_group(group_name).clone()
- _val=_mat._values()**-0.5
- _val[torch.isinf(_val)]=0
- self.group_cache[group_name]["D_v_neg_1_2"]=torch.sparse_coo_tensor(
- _mat._indices(),_val,_mat.size(),device=self.device
- ).coalesce()
- returnself.group_cache[group_name]["D_v_neg_1_2"]
[docs]defD_e_neg_1_of_group(self,group_name:str)->torch.Tensor:
-r"""Return the hyperedge degree matrix :math:`\mathbf{D}_e^{-1}` of the specified hyperedge group with ``torch.sparse_coo_tensor`` format.
-
- Args:
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.group_cache[group_name].get("D_e_neg_1")isNone:
- _mat=self.D_e_of_group(group_name).clone()
- _val=_mat._values()**-1
- _val[torch.isinf(_val)]=0
- self.group_cache[group_name]["D_e_neg_1"]=torch.sparse_coo_tensor(
- _mat._indices(),_val,_mat.size(),device=self.device
- ).coalesce()
- returnself.group_cache[group_name]["D_e_neg_1"]
-
-
[docs]defN_e(self,v_idx:int)->torch.Tensor:
-r"""Return the neighbor hyperedges of the specified vertex with ``torch.Tensor`` format.
-
- .. note::
- The ``v_idx`` must be in the range of [0, :attr:`num_v`).
-
- Args:
- ``v_idx`` (``int``): The index of the vertex.
- """
- assertv_idx<self.num_v
- _tmp,e_bias=[],0
- fornameinself.group_names:
- _tmp.append(self.N_e_of_group(v_idx,name)+e_bias)
- e_bias+=self.num_e_of_group(name)
- returntorch.cat(_tmp,dim=0)
-
-
[docs]defN_e_of_group(self,v_idx:int,group_name:str)->torch.Tensor:
-r"""Return the neighbor hyperedges of the specified vertex of the specified hyperedge group with ``torch.Tensor`` format.
-
- .. note::
- The ``v_idx`` must be in the range of [0, :attr:`num_v`).
-
- Args:
- ``v_idx`` (``int``): The index of the vertex.
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- assertv_idx<self.num_v
- e_indices=self.H_of_group(group_name)[v_idx]._indices()[0]
- returne_indices.clone()
-
-
[docs]defN_v(self,e_idx:int)->torch.Tensor:
-r"""Return the neighbor vertices of the specified hyperedge with ``torch.Tensor`` format.
-
- .. note::
- The ``e_idx`` must be in the range of [0, :attr:`num_e`).
-
- Args:
- ``e_idx`` (``int``): The index of the hyperedge.
- """
- asserte_idx<self.num_e
- fornameinself.group_names:
- ife_idx<self.num_e_of_group(name):
- returnself.N_v_of_group(e_idx,name)
- else:
- e_idx-=self.num_e_of_group(name)
-
-
[docs]defN_v_of_group(self,e_idx:int,group_name:str)->torch.Tensor:
-r"""Return the neighbor vertices of the specified hyperedge of the specified hyperedge group with ``torch.Tensor`` format.
-
- .. note::
- The ``e_idx`` must be in the range of [0, :func:`num_e_of_group`).
-
- Args:
- ``e_idx`` (``int``): The index of the hyperedge.
- ``group_name`` (``str``): The name of the specified hyperedge group.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- asserte_idx<self.num_e_of_group(group_name)
- v_indices=self.H_T_of_group(group_name)[e_idx]._indices()[0]
- returnv_indices.clone()
[docs]defv2e_aggregation(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message aggretation step of ``vertices to hyperedges``.
-
- Args:
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- assertaggrin["mean","sum","softmax_then_sum"]
- ifself.device!=X.device:
- self.to(X.device)
- ifv2e_weightisNone:
- ifdrop_rate>0.0:
- P=sparse_dropout(self.H_T,drop_rate)
- else:
- P=self.H_T
- ifaggr=="mean":
- X=torch.sparse.mm(P,X)
- X=torch.sparse.mm(self.D_e_neg_1,X)
- elifaggr=="sum":
- X=torch.sparse.mm(P,X)
- elifaggr=="softmax_then_sum":
- P=torch.sparse.softmax(P,dim=1)
- X=torch.sparse.mm(P,X)
- else:
- raiseValueError(f"Unknown aggregation method {aggr}.")
- else:
- # init message path
- assert(
- v2e_weight.shape[0]==self.v2e_weight.shape[0]
- ),"The size of v2e_weight must be equal to the size of self.v2e_weight."
- P=torch.sparse_coo_tensor(
- self.H_T._indices(),v2e_weight,self.H_T.shape,device=self.device
- )
- ifdrop_rate>0.0:
- P=sparse_dropout(P,drop_rate)
- # message passing
- ifaggr=="mean":
- X=torch.sparse.mm(P,X)
- D_e_neg_1=torch.sparse.sum(P,dim=1).to_dense().view(-1,1)
- D_e_neg_1[torch.isinf(D_e_neg_1)]=0
- X=D_e_neg_1*X
- elifaggr=="sum":
- X=torch.sparse.mm(P,X)
- elifaggr=="softmax_then_sum":
- P=torch.sparse.softmax(P,dim=1)
- X=torch.sparse.mm(P,X)
- else:
- raiseValueError(f"Unknown aggregation method {aggr}.")
- returnX
-
-
[docs]defv2e_aggregation_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message aggregation step of ``vertices to hyperedges`` in specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- assertaggrin["mean","sum","softmax_then_sum"]
- ifself.device!=X.device:
- self.to(X.device)
- ifv2e_weightisNone:
- ifdrop_rate>0.0:
- P=sparse_dropout(self.H_T_of_group(group_name),drop_rate)
- else:
- P=self.H_T_of_group(group_name)
- ifaggr=="mean":
- X=torch.sparse.mm(P,X)
- X=torch.sparse.mm(self.D_e_neg_1_of_group(group_name),X)
- elifaggr=="sum":
- X=torch.sparse.mm(P,X)
- elifaggr=="softmax_then_sum":
- P=torch.sparse.softmax(P,dim=1)
- X=torch.sparse.mm(P,X)
- else:
- raiseValueError(f"Unknown aggregation method {aggr}.")
- else:
- # init message path
- assert(
- v2e_weight.shape[0]==self.v2e_weight_of_group(group_name).shape[0]
- ),(
- "The size of v2e_weight must be equal to the size of"
- f" self.v2e_weight_of_group('{group_name}')."
- )
- P=torch.sparse_coo_tensor(
- self.H_T_of_group(group_name)._indices(),
- v2e_weight,
- self.H_T_of_group(group_name).shape,
- device=self.device,
- )
- ifdrop_rate>0.0:
- P=sparse_dropout(P,drop_rate)
- # message passing
- ifaggr=="mean":
- X=torch.sparse.mm(P,X)
- D_e_neg_1=torch.sparse.sum(P,dim=1).to_dense().view(-1,1)
- D_e_neg_1[torch.isinf(D_e_neg_1)]=0
- X=D_e_neg_1*X
- elifaggr=="sum":
- X=torch.sparse.mm(P,X)
- elifaggr=="softmax_then_sum":
- P=torch.sparse.softmax(P,dim=1)
- X=torch.sparse.mm(P,X)
- else:
- raiseValueError(f"Unknown aggregation method {aggr}.")
- returnX
-
-
[docs]defv2e_update(self,X:torch.Tensor,e_weight:Optional[torch.Tensor]=None):
-r"""Message update step of ``vertices to hyperedges``.
-
- Args:
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
- ifself.device!=X.device:
- self.to(X.device)
- ife_weightisNone:
- X=torch.sparse.mm(self.W_e,X)
- else:
- e_weight=e_weight.view(-1,1)
- assert(
- e_weight.shape[0]==self.num_e
- ),"The size of e_weight must be equal to the size of self.num_e."
- X=e_weight*X
- returnX
-
-
[docs]defv2e_update_of_group(
- self,group_name:str,X:torch.Tensor,e_weight:Optional[torch.Tensor]=None
- ):
-r"""Message update step of ``vertices to hyperedges`` in specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.device!=X.device:
- self.to(X.device)
- ife_weightisNone:
- X=torch.sparse.mm(self.W_e_of_group(group_name),X)
- else:
- e_weight=e_weight.view(-1,1)
- asserte_weight.shape[0]==self.num_e_of_group(group_name),(
- "The size of e_weight must be equal to the size of"
- f" self.num_e_of_group('{group_name}')."
- )
- X=e_weight*X
- returnX
-
-
[docs]defv2e(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_weight:Optional[torch.Tensor]=None,
- e_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message passing of ``vertices to hyperedges``. The combination of ``v2e_aggregation`` and ``v2e_update``.
-
- Args:
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- X=self.v2e_aggregation(X,aggr,v2e_weight,drop_rate=drop_rate)
- X=self.v2e_update(X,e_weight)
- returnX
-
-
[docs]defv2e_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- v2e_weight:Optional[torch.Tensor]=None,
- e_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message passing of ``vertices to hyperedges`` in specified hyperedge group. The combination of ``e2v_aggregation_of_group`` and ``e2v_update_of_group``.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- X=self.v2e_aggregation_of_group(
- group_name,X,aggr,v2e_weight,drop_rate=drop_rate
- )
- X=self.v2e_update_of_group(group_name,X,e_weight)
- returnX
-
-
[docs]defe2v_aggregation(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- e2v_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message aggregation step of ``hyperedges to vertices``.
-
- Args:
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- assertaggrin["mean","sum","softmax_then_sum"]
- ifself.device!=X.device:
- self.to(X.device)
- ife2v_weightisNone:
- ifdrop_rate>0.0:
- P=sparse_dropout(self.H,drop_rate)
- else:
- P=self.H
- ifaggr=="mean":
- X=torch.sparse.mm(P,X)
- X=torch.sparse.mm(self.D_v_neg_1,X)
- elifaggr=="sum":
- X=torch.sparse.mm(P,X)
- elifaggr=="softmax_then_sum":
- P=torch.sparse.softmax(P,dim=1)
- X=torch.sparse.mm(P,X)
- else:
- raiseValueError(f"Unknown aggregation method: {aggr}")
- else:
- # init message path
- assert(
- e2v_weight.shape[0]==self.e2v_weight.shape[0]
- ),"The size of e2v_weight must be equal to the size of self.e2v_weight."
- P=torch.sparse_coo_tensor(
- self.H._indices(),e2v_weight,self.H.shape,device=self.device
- )
- ifdrop_rate>0.0:
- P=sparse_dropout(P,drop_rate)
- # message passing
- ifaggr=="mean":
- X=torch.sparse.mm(P,X)
- D_v_neg_1=torch.sparse.sum(P,dim=1).to_dense().view(-1,1)
- D_v_neg_1[torch.isinf(D_v_neg_1)]=0
- X=D_v_neg_1*X
- elifaggr=="sum":
- X=torch.sparse.mm(P,X)
- elifaggr=="softmax_then_sum":
- P=torch.sparse.softmax(P,dim=1)
- X=torch.sparse.mm(P,X)
- else:
- raiseValueError(f"Unknown aggregation method: {aggr}")
- returnX
-
-
[docs]defe2v_aggregation_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- e2v_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message aggregation step of ``hyperedges to vertices`` in specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- assertaggrin["mean","sum","softmax_then_sum"]
- ifself.device!=X.device:
- self.to(X.device)
- ife2v_weightisNone:
- ifdrop_rate>0.0:
- P=sparse_dropout(self.H_of_group(group_name),drop_rate)
- else:
- P=self.H_of_group(group_name)
- ifaggr=="mean":
- X=torch.sparse.mm(P,X)
- X=torch.sparse.mm(self.D_v_neg_1_of_group[group_name],X)
- elifaggr=="sum":
- X=torch.sparse.mm(P,X)
- elifaggr=="softmax_then_sum":
- P=torch.sparse.softmax(P,dim=1)
- X=torch.sparse.mm(P,X)
- else:
- raiseValueError(f"Unknown aggregation method: {aggr}")
- else:
- # init message path
- assert(
- e2v_weight.shape[0]==self.e2v_weight_of_group[group_name].shape[0]
- ),(
- "The size of e2v_weight must be equal to the size of"
- f" self.e2v_weight_of_group('{group_name}')."
- )
- P=torch.sparse_coo_tensor(
- self.H_of_group[group_name]._indices(),
- e2v_weight,
- self.H_of_group[group_name].shape,
- device=self.device,
- )
- ifdrop_rate>0.0:
- P=sparse_dropout(P,drop_rate)
- # message passing
- ifaggr=="mean":
- X=torch.sparse.mm(P,X)
- D_v_neg_1=torch.sparse.sum(P,dim=1).to_dense().view(-1,1)
- D_v_neg_1[torch.isinf(D_v_neg_1)]=0
- X=D_v_neg_1*X
- elifaggr=="sum":
- X=torch.sparse.mm(P,X)
- elifaggr=="softmax_then_sum":
- P=torch.sparse.softmax(P,dim=1)
- X=torch.sparse.mm(P,X)
- else:
- raiseValueError(f"Unknown aggregation method: {aggr}")
- returnX
[docs]defe2v_update_of_group(self,group_name:str,X:torch.Tensor):
-r"""Message update step of ``hyperedges to vertices`` in specified hyperedge group.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifself.device!=X.device:
- self.to(X.device)
- returnX
-
-
[docs]defe2v(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- e2v_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message passing of ``hyperedges to vertices``. The combination of ``e2v_aggregation`` and ``e2v_update``.
-
- Args:
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- X=self.e2v_aggregation(X,aggr,e2v_weight,drop_rate=drop_rate)
- X=self.e2v_update(X)
- returnX
-
-
[docs]defe2v_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- e2v_weight:Optional[torch.Tensor]=None,
- drop_rate:float=0.0,
- ):
-r"""Message passing of ``hyperedges to vertices`` in specified hyperedge group. The combination of ``e2v_aggregation_of_group`` and ``e2v_update_of_group``.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Hyperedge feature matrix. Size :math:`(|\mathcal{E}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- X=self.e2v_aggregation_of_group(
- group_name,X,aggr,e2v_weight,drop_rate=drop_rate
- )
- X=self.e2v_update_of_group(group_name,X)
- returnX
-
-
[docs]defv2v(
- self,
- X:torch.Tensor,
- aggr:str="mean",
- drop_rate:float=0.0,
- v2e_aggr:Optional[str]=None,
- v2e_weight:Optional[torch.Tensor]=None,
- v2e_drop_rate:Optional[float]=None,
- e_weight:Optional[torch.Tensor]=None,
- e2v_aggr:Optional[str]=None,
- e2v_weight:Optional[torch.Tensor]=None,
- e2v_drop_rate:Optional[float]=None,
- ):
-r"""Message passing of ``vertices to vertices``. The combination of ``v2e`` and ``e2v``.
-
- Args:
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, this ``aggr`` will be used to both ``v2e`` and ``e2v``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- ``v2e_aggr`` (``str``, optional): The aggregation method for hyperedges to vertices. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, it will override the ``aggr`` in ``e2v``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``v2e_drop_rate`` (``float``, optional): Dropout rate for hyperedges to vertices. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. If specified, it will override the ``drop_rate`` in ``e2v``. Default: ``None``.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e2v_aggr`` (``str``, optional): The aggregation method for vertices to hyperedges. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, it will override the ``aggr`` in ``v2e``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e2v_drop_rate`` (``float``, optional): Dropout rate for vertices to hyperedges. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. If specified, it will override the ``drop_rate`` in ``v2e``. Default: ``None``.
- """
- ifv2e_aggrisNone:
- v2e_aggr=aggr
- ife2v_aggrisNone:
- e2v_aggr=aggr
- ifv2e_drop_rateisNone:
- v2e_drop_rate=drop_rate
- ife2v_drop_rateisNone:
- e2v_drop_rate=drop_rate
- X=self.v2e(X,v2e_aggr,v2e_weight,e_weight,drop_rate=v2e_drop_rate)
- X=self.e2v(X,e2v_aggr,e2v_weight,drop_rate=e2v_drop_rate)
- returnX
-
-
[docs]defv2v_of_group(
- self,
- group_name:str,
- X:torch.Tensor,
- aggr:str="mean",
- drop_rate:float=0.0,
- v2e_aggr:Optional[str]=None,
- v2e_weight:Optional[torch.Tensor]=None,
- v2e_drop_rate:Optional[float]=None,
- e_weight:Optional[torch.Tensor]=None,
- e2v_aggr:Optional[str]=None,
- e2v_weight:Optional[torch.Tensor]=None,
- e2v_drop_rate:Optional[float]=None,
- ):
-r"""Message passing of ``vertices to vertices`` in specified hyperedge group. The combination of ``v2e_of_group`` and ``e2v_of_group``.
-
- Args:
- ``group_name`` (``str``): The specified hyperedge group.
- ``X`` (``torch.Tensor``): Vertex feature matrix. Size :math:`(|\mathcal{V}|, C)`.
- ``aggr`` (``str``): The aggregation method. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, this ``aggr`` will be used to both ``v2e_of_group`` and ``e2v_of_group``.
- ``drop_rate`` (``float``): Dropout rate. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. Default: ``0.0``.
- ``v2e_aggr`` (``str``, optional): The aggregation method for hyperedges to vertices. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, it will override the ``aggr`` in ``e2v_of_group``.
- ``v2e_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (vertices point to hyepredges). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``v2e_drop_rate`` (``float``, optional): Dropout rate for hyperedges to vertices. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. If specified, it will override the ``drop_rate`` in ``e2v_of_group``. Default: ``None``.
- ``e_weight`` (``torch.Tensor``, optional): The hyperedge weight vector. If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e2v_aggr`` (``str``, optional): The aggregation method for vertices to hyperedges. Can be ``'mean'``, ``'sum'`` and ``'softmax_then_sum'``. If specified, it will override the ``aggr`` in ``v2e_of_group``.
- ``e2v_weight`` (``torch.Tensor``, optional): The weight vector attached to connections (hyperedges point to vertices). If not specified, the function will use the weights specified in hypergraph construction. Defaults to ``None``.
- ``e2v_drop_rate`` (``float``, optional): Dropout rate for vertices to hyperedges. Randomly dropout the connections in incidence matrix with probability ``drop_rate``. If specified, it will override the ``drop_rate`` in ``v2e_of_group``. Default: ``None``.
- """
- assert(
- group_nameinself.group_names
- ),f"The specified {group_name} is not in existing hyperedge groups."
- ifv2e_aggrisNone:
- v2e_aggr=aggr
- ife2v_aggrisNone:
- e2v_aggr=aggr
- ifv2e_drop_rateisNone:
- v2e_drop_rate=drop_rate
- ife2v_drop_rateisNone:
- e2v_drop_rate=drop_rate
- X=self.v2e_of_group(
- group_name,X,v2e_aggr,v2e_weight,e_weight,drop_rate=v2e_drop_rate
- )
- X=self.e2v_of_group(
- group_name,X,e2v_aggr,e2v_weight,drop_rate=e2v_drop_rate
- )
- returnX
-"""Base class for MultiGraph."""
-fromcopyimportdeepcopy
-fromtypingimportDict
-fromtypingimportList
-
-importeasygraphaseg
-importeasygraph.convertasconvert
-
-fromeasygraph.classes.graphimportGraph
-fromeasygraph.utils.exceptionimportEasyGraphError
-
-
-__all__=["MultiGraph"]
-
-
-
[docs]classMultiGraph(Graph):
- edge_key_dict_factory=dict
-
- def__init__(self,incoming_graph_data=None,multigraph_input=None,**attr):
-"""Initialize a graph with edges, name, or graph attributes.
-
- Parameters
- ----------
- incoming_graph_data : input graph
- Data to initialize graph. If incoming_graph_data=None (default)
- an empty graph is created. The data can be an edge list, or any
- EasyGraph graph object. If the corresponding optional Python
- packages are installed the data can also be a NumPy matrix
- or 2d ndarray, a SciPy sparse matrix, or a PyGraphviz graph.
-
- multigraph_input : bool or None (default None)
- Note: Only used when `incoming_graph_data` is a dict.
- If True, `incoming_graph_data` is assumed to be a
- dict-of-dict-of-dict-of-dict structure keyed by
- node to neighbor to edge keys to edge data for multi-edges.
- A EasyGraphError is raised if this is not the case.
- If False, :func:`to_easygraph_graph` is used to try to determine
- the dict's graph data structure as either a dict-of-dict-of-dict
- keyed by node to neighbor to edge data, or a dict-of-iterable
- keyed by node to neighbors.
- If None, the treatment for True is tried, but if it fails,
- the treatment for False is tried.
-
- attr : keyword arguments, optional (default= no attributes)
- Attributes to add to graph as key=value pairs.
-
- See Also
- --------
- convert
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G = eg.Graph(name="my graph")
- >>> e = [(1, 2), (2, 3), (3, 4)] # list of edges
- >>> G = eg.Graph(e)
-
- Arbitrary graph attribute pairs (key=value) may be assigned
-
- >>> G = eg.Graph(e, day="Friday")
- >>> G.graph
- {'day': 'Friday'}
-
- """
- self.edge_key_dict_factory=self.edge_key_dict_factory
- ifisinstance(incoming_graph_data,dict)andmultigraph_inputisnotFalse:
- Graph.__init__(self)
- try:
- convert.from_dict_of_dicts(
- incoming_graph_data,create_using=self,multigraph_input=True
- )
- self.graph.update(attr)
- exceptExceptionaserr:
- ifmultigraph_inputisTrue:
- raiseeg.EasyGraphError(
- f"converting multigraph_input raised:\n{type(err)}: {err}"
- )
- Graph.__init__(self,incoming_graph_data,**attr)
- else:
- Graph.__init__(self,incoming_graph_data,**attr)
-
-
[docs]defnew_edge_key(self,u,v):
-"""Returns an unused key for edges between nodes `u` and `v`.
-
- The nodes `u` and `v` do not need to be already in the graph.
-
- Notes
- -----
- In the standard MultiGraph class the new key is the number of existing
- edges between `u` and `v` (increased if necessary to ensure unused).
- The first edge will have key 0, then 1, etc. If an edge is removed
- further new_edge_keys may not be in this order.
-
- Parameters
- ----------
- u, v : nodes
-
- Returns
- -------
- key : int
- """
- try:
- keydict=self._adj[u][v]
- exceptKeyError:
- return0
- key=len(keydict)
- whilekeyinkeydict:
- key+=1
- returnkey
-
-
[docs]defadd_edge(self,u_for_edge,v_for_edge,key=None,**attr):
-"""Add an edge between u and v.
-
- The nodes u and v will be automatically added if they are
- not already in the graph.
-
- Edge attributes can be specified with keywords or by directly
- accessing the edge's attribute dictionary. See examples below.
-
- Parameters
- ----------
- u_for_edge, v_for_edge : nodes
- Nodes can be, for example, strings or numbers.
- Nodes must be hashable (and not None) Python objects.
- key : hashable identifier, optional (default=lowest unused integer)
- Used to distinguish multiedges between a pair of nodes.
- attr : keyword arguments, optional
- Edge data (or labels or objects) can be assigned using
- keyword arguments.
-
- Returns
- -------
- The edge key assigned to the edge.
-
- See Also
- --------
- add_edges_from : add a collection of edges
-
- Notes
- -----
- To replace/update edge data, use the optional key argument
- to identify a unique edge. Otherwise a new edge will be created.
-
- EasyGraph algorithms designed for weighted graphs cannot use
- multigraphs directly because it is not clear how to handle
- multiedge weights. Convert to Graph using edge attribute
- 'weight' to enable weighted graph algorithms.
-
- Default keys are generated using the method `new_edge_key()`.
- This method can be overridden by subclassing the base class and
- providing a custom `new_edge_key()` method.
-
- Examples
- --------
- The following all add the edge e=(1, 2) to graph G:
-
- >>> G = eg.MultiGraph()
- >>> e = (1, 2)
- >>> ekey = G.add_edge(1, 2) # explicit two-node form
- >>> G.add_edge(*e) # single edge as tuple of two nodes
- 1
- >>> G.add_edges_from([(1, 2)]) # add edges from iterable container
- [2]
-
- Associate data to edges using keywords:
-
- >>> ekey = G.add_edge(1, 2, weight=3)
- >>> ekey = G.add_edge(1, 2, key=0, weight=4) # update data for key=0
- >>> ekey = G.add_edge(1, 3, weight=7, capacity=15, length=342.7)
-
- For non-string attribute keys, use subscript notation.
-
- >>> ekey = G.add_edge(1, 2)
- >>> G[1][2][0].update({0: 5})
- >>> G.edges[1, 2, 0].update({0: 5})
- """
- u,v=u_for_edge,v_for_edge
- # add nodes
- ifunotinself._adj:
- ifuisNone:
- raiseValueError("None cannot be a node")
- self._adj[u]=self.adjlist_inner_dict_factory()
- self._node[u]=self.node_attr_dict_factory()
- ifvnotinself._adj:
- ifvisNone:
- raiseValueError("None cannot be a node")
- self._adj[v]=self.adjlist_inner_dict_factory()
- self._node[v]=self.node_attr_dict_factory()
- ifkeyisNone:
- key=self.new_edge_key(u,v)
- ifvinself._adj[u]:
- keydict=self._adj[u][v]
- datadict=keydict.get(key,self.edge_attr_dict_factory())
- datadict.update(attr)
- keydict[key]=datadict
- else:
- # selfloops work this way without special treatment
- datadict=self.edge_attr_dict_factory()
- datadict.update(attr)
- keydict=self.edge_key_dict_factory()
- keydict[key]=datadict
- self._adj[u][v]=keydict
- self._adj[v][u]=keydict
- returnkey
-
-
[docs]defadd_edges_from(self,ebunch_to_add,**attr):
-"""Add all the edges in ebunch_to_add.
-
- Parameters
- ----------
- ebunch_to_add : container of edges
- Each edge given in the container will be added to the
- graph. The edges can be:
-
- - 2-tuples (u, v) or
- - 3-tuples (u, v, d) for an edge data dict d, or
- - 3-tuples (u, v, k) for not iterable key k, or
- - 4-tuples (u, v, k, d) for an edge with data and key k
-
- attr : keyword arguments, optional
- Edge data (or labels or objects) can be assigned using
- keyword arguments.
-
- Returns
- -------
- A list of edge keys assigned to the edges in `ebunch`.
-
- See Also
- --------
- add_edge : add a single edge
- add_weighted_edges_from : convenient way to add weighted edges
-
- Notes
- -----
- Adding the same edge twice has no effect but any edge data
- will be updated when each duplicate edge is added.
-
- Edge attributes specified in an ebunch take precedence over
- attributes specified via keyword arguments.
-
- Default keys are generated using the method ``new_edge_key()``.
- This method can be overridden by subclassing the base class and
- providing a custom ``new_edge_key()`` method.
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_edges_from([(0, 1), (1, 2)]) # using a list of edge tuples
- >>> e = zip(range(0, 3), range(1, 4))
- >>> G.add_edges_from(e) # Add the path graph 0-1-2-3
-
- Associate data to edges
-
- >>> G.add_edges_from([(1, 2), (2, 3)], weight=3)
- >>> G.add_edges_from([(3, 4), (1, 4)], label="WN2898")
- """
- keylist=[]
- foreinebunch_to_add:
- ne=len(e)
- ifne==4:
- u,v,key,dd=e
- elifne==3:
- u,v,dd=e
- key=None
- elifne==2:
- u,v=e
- dd={}
- key=None
- else:
- msg=f"Edge tuple {e} must be a 2-tuple, 3-tuple or 4-tuple."
- raiseEasyGraphError(msg)
- ddd={}
- ddd.update(attr)
- try:
- ddd.update(dd)
- except(TypeError,ValueError):
- ifne!=3:
- raise
- key=dd# ne == 3 with 3rd value not dict, must be a key
- key=self.add_edge(u,v,key)
- self[u][v][key].update(ddd)
- keylist.append(key)
- returnkeylist
-
-
[docs]defremove_edge(self,u,v,key=None):
-"""Remove an edge between u and v.
-
- Parameters
- ----------
- u, v : nodes
- Remove an edge between nodes u and v.
- key : hashable identifier, optional (default=None)
- Used to distinguish multiple edges between a pair of nodes.
- If None remove a single (arbitrary) edge between u and v.
-
- Raises
- ------
- EasyGraphError
- If there is not an edge between u and v, or
- if there is no edge with the specified key.
-
- See Also
- --------
- remove_edges_from : remove a collection of edges
-
- Examples
- --------
- For multiple edges
-
- >>> G = eg.MultiGraph() # or MultiDiGraph, etc
- >>> G.add_edges_from([(1, 2), (1, 2), (1, 2)]) # key_list returned
- [0, 1, 2]
- >>> G.remove_edge(1, 2) # remove a single (arbitrary) edge
-
- For edges with keys
-
- >>> G = eg.MultiGraph() # or MultiDiGraph, etc
- >>> G.add_edge(1, 2, key="first")
- 'first'
- >>> G.add_edge(1, 2, key="second")
- 'second'
- >>> G.remove_edge(1, 2, key="second")
-
- """
- try:
- d=self._adj[u][v]
- exceptKeyErroraserr:
- raiseEasyGraphError(f"The edge {u}-{v} is not in the graph.")fromerr
- # remove the edge with specified data
- ifkeyisNone:
- d.popitem()
- else:
- try:
- deld[key]
- exceptKeyErroraserr:
- msg=f"The edge {u}-{v} with key {key} is not in the graph."
- raiseEasyGraphError(msg)fromerr
- iflen(d)==0:
- # remove the key entries if last edge
- delself._adj[u][v]
- ifu!=v:# check for selfloop
- delself._adj[v][u]
-
-
[docs]defremove_edges_from(self,ebunch):
-"""Remove all edges specified in ebunch.
-
- Parameters
- ----------
- ebunch: list or container of edge tuples
- Each edge given in the list or container will be removed
- from the graph. The edges can be:
-
- - 2-tuples (u, v) All edges between u and v are removed.
- - 3-tuples (u, v, key) The edge identified by key is removed.
- - 4-tuples (u, v, key, data) where data is ignored.
-
- See Also
- --------
- remove_edge : remove a single edge
-
- Notes
- -----
- Will fail silently if an edge in ebunch is not in the graph.
-
- Examples
- --------
- Removing multiple copies of edges
-
- >>> G = eg.MultiGraph()
- >>> keys = G.add_edges_from([(1, 2), (1, 2), (1, 2)])
- >>> G.remove_edges_from([(1, 2), (1, 2)])
- >>> list(G.edges())
- [(1, 2)]
- >>> G.remove_edges_from([(1, 2), (1, 2)]) # silently ignore extra copy
- >>> list(G.edges) # now empty graph
- []
- """
- foreinebunch:
- try:
- self.remove_edge(*e[:3])
- exceptEasyGraphError:
- pass
-
-
[docs]defhas_edge(self,u,v,key=None):
-"""Returns True if the graph has an edge between nodes u and v.
-
- This is the same as `v in G[u] or key in G[u][v]`
- without KeyError exceptions.
-
- Parameters
- ----------
- u, v : nodes
- Nodes can be, for example, strings or numbers.
-
- key : hashable identifier, optional (default=None)
- If specified return True only if the edge with
- key is found.
-
- Returns
- -------
- edge_ind : bool
- True if edge is in the graph, False otherwise.
-
- Examples
- --------
- Can be called either using two nodes u, v, an edge tuple (u, v),
- or an edge tuple (u, v, key).
-
- >>> G = eg.MultiGraph() # or MultiDiGraph
- >>> G = eg.complete_graph(4, create_using=eg.MultiDiGraph)
- >>> G.has_edge(0, 1) # using two nodes
- True
- >>> e = (0, 1)
- >>> G.has_edge(*e) # e is a 2-tuple (u, v)
- True
- >>> G.add_edge(0, 1, key="a")
- 'a'
- >>> G.has_edge(0, 1, key="a") # specify key
- True
- >>> e = (0, 1, "a")
- >>> G.has_edge(*e) # e is a 3-tuple (u, v, 'a')
- True
-
- The following syntax are equivalent:
-
- >>> G.has_edge(0, 1)
- True
- >>> 1 in G[0] # though this gives :exc:`KeyError` if 0 not in G
- True
-
- """
- try:
- ifkeyisNone:
- returnvinself._adj[u]
- else:
- returnkeyinself._adj[u][v]
- exceptKeyError:
- returnFalse
[docs]defget_edge_data(self,u,v,key=None,default=None):
-"""Returns the attribute dictionary associated with edge (u, v).
-
- This is identical to `G[u][v][key]` except the default is returned
- instead of an exception is the edge doesn't exist.
-
- Parameters
- ----------
- u, v : nodes
-
- default : any Python object (default=None)
- Value to return if the edge (u, v) is not found.
-
- key : hashable identifier, optional (default=None)
- Return data only for the edge with specified key.
-
- Returns
- -------
- edge_dict : dictionary
- The edge attribute dictionary.
-
- Examples
- --------
- >>> G = eg.MultiGraph() # or MultiDiGraph
- >>> key = G.add_edge(0, 1, key="a", weight=7)
- >>> G[0][1]["a"] # key='a'
- {'weight': 7}
- >>> G.edges[0, 1, "a"] # key='a'
- {'weight': 7}
-
- Warning: we protect the graph data structure by making
- `G.edges` and `G[1][2]` read-only dict-like structures.
- However, you can assign values to attributes in e.g.
- `G.edges[1, 2, 'a']` or `G[1][2]['a']` using an additional
- bracket as shown next. You need to specify all edge info
- to assign to the edge data associated with an edge.
-
- >>> G[0][1]["a"]["weight"] = 10
- >>> G.edges[0, 1, "a"]["weight"] = 10
- >>> G[0][1]["a"]["weight"]
- 10
- >>> G.edges[1, 0, "a"]["weight"]
- 10
-
- >>> G = eg.MultiGraph() # or MultiDiGraph
- >>> G = eg.complete_graph(4, create_using=eg.MultiDiGraph)
- >>> G.get_edge_data(0, 1)
- {0: {}}
- >>> e = (0, 1)
- >>> G.get_edge_data(*e) # tuple form
- {0: {}}
- >>> G.get_edge_data("a", "b", default=0) # edge not in graph, return 0
- 0
- """
- try:
- ifkeyisNone:
- returnself._adj[u][v]
- else:
- returnself._adj[u][v][key]
- exceptKeyError:
- returndefault
[docs]defis_multigraph(self):
-"""Returns True if graph is a multigraph, False otherwise."""
- returnTrue
-
-
[docs]defis_directed(self):
-"""Returns True if graph is directed, False otherwise."""
- returnFalse
-
-
[docs]defcopy(self):
-"""Returns a copy of the graph.
-
- The copy method by default returns an independent shallow copy
- of the graph and attributes. That is, if an attribute is a
- container, that container is shared by the original an the copy.
- Use Python's `copy.deepcopy` for new containers.
-
- Notes
- -----
- All copies reproduce the graph structure, but data attributes
- may be handled in different ways. There are four types of copies
- of a graph that people might want.
-
- Deepcopy -- A "deepcopy" copies the graph structure as well as
- all data attributes and any objects they might contain.
- The entire graph object is new so that changes in the copy
- do not affect the original object. (see Python's copy.deepcopy)
-
- Data Reference (Shallow) -- For a shallow copy the graph structure
- is copied but the edge, node and graph attribute dicts are
- references to those in the original graph. This saves
- time and memory but could cause confusion if you change an attribute
- in one graph and it changes the attribute in the other.
- EasyGraph does not provide this level of shallow copy.
-
- Independent Shallow -- This copy creates new independent attribute
- dicts and then does a shallow copy of the attributes. That is, any
- attributes that are containers are shared between the new graph
- and the original. This is exactly what `dict.copy()` provides.
- You can obtain this style copy using:
-
- >>> G = eg.path_graph(5)
- >>> H = G.copy()
- >>> H = eg.Graph(G)
- >>> H = G.__class__(G)
-
- Fresh Data -- For fresh data, the graph structure is copied while
- new empty data attribute dicts are created. The resulting graph
- is independent of the original and it has no edge, node or graph
- attributes. Fresh copies are not enabled. Instead use:
-
- >>> H = G.__class__()
- >>> H.add_nodes_from(G)
- >>> H.add_edges_from(G.edges)
-
- See the Python copy module for more information on shallow
- and deep copies, https://docs.python.org/3/library/copy.html.
-
- Returns
- -------
- G : Graph
- A copy of the graph.
-
- See Also
- --------
- to_directed: return a directed copy of the graph.
-
- Examples
- --------
- >>> G = eg.path_graph(4) # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> H = G.copy()
-
- """
- G=self.__class__()
- G.graph.update(self.graph)
- G.add_nodes_from((n,d.copy())forn,dinself._node.items())
- G.add_edges_from(
- (u,v,key,datadict.copy())
- foru,nbrsinself._adj.items()
- forv,keydictinnbrs.items()
- forkey,datadictinkeydict.items()
- )
- returnG
-
-
[docs]defto_directed(self):
-"""Returns a directed representation of the graph.
-
- Returns
- -------
- G : MultiDiGraph
- A directed graph with the same name, same nodes, and with
- each edge (u, v, data) replaced by two directed edges
- (u, v, data) and (v, u, data).
-
- Notes
- -----
- This returns a "deepcopy" of the edge, node, and
- graph attributes which attempts to completely copy
- all of the data and references.
-
- This is in contrast to the similar D=DiGraph(G) which returns a
- shallow copy of the data.
-
- See the Python copy module for more information on shallow
- and deep copies, https://docs.python.org/3/library/copy.html.
-
- Warning: If you have subclassed MultiGraph to use dict-like objects
- in the data structure, those changes do not transfer to the
- MultiDiGraph created by this method.
-
- Examples
- --------
- >>> G = eg.Graph() # or MultiGraph, etc
- >>> G.add_edge(0, 1)
- >>> H = G.to_directed()
- >>> list(H.edges)
- [(0, 1), (1, 0)]
-
- If already directed, return a (deep) copy
-
- >>> G = eg.DiGraph() # or MultiDiGraph, etc
- >>> G.add_edge(0, 1)
- >>> H = G.to_directed()
- >>> list(H.edges)
- [(0, 1)]
- """
- G=eg.MultiDiGraph()
- G.graph.update(deepcopy(self.graph))
- G.add_nodes_from((n,deepcopy(d))forn,dinself._node.items())
- G.add_edges_from(
- (u,v,key,deepcopy(datadict))
- foru,nbrsinself.adj.items()
- forv,keydictinnbrs.items()
- forkey,datadictinkeydict.items()
- )
- returnG
-
-
[docs]defnumber_of_edges(self,u=None,v=None):
-"""Returns the number of edges between two nodes.
-
- Parameters
- ----------
- u, v : nodes, optional (Gefault=all edges)
- If u and v are specified, return the number of edges between
- u and v. Otherwise return the total number of all edges.
-
- Returns
- -------
- nedges : int
- The number of edges in the graph. If nodes `u` and `v` are
- specified return the number of edges between those nodes. If
- the graph is directed, this only returns the number of edges
- from `u` to `v`.
-
- See Also
- --------
- size
-
- Examples
- --------
- For undirected multigraphs, this method counts the total number
- of edges in the graph::
-
- >>> G = eg.MultiGraph()
- >>> G.add_edges_from([(0, 1), (0, 1), (1, 2)])
- [0, 1, 0]
- >>> G.number_of_edges()
- 3
-
- If you specify two nodes, this counts the total number of edges
- joining the two nodes::
-
- >>> G.number_of_edges(0, 1)
- 2
-
- For directed multigraphs, this method can count the total number
- of directed edges from `u` to `v`::
-
- >>> G = eg.MultiDiGraph()
- >>> G.add_edges_from([(0, 1), (0, 1), (1, 0)])
- [0, 1, 0]
- >>> G.number_of_edges(0, 1)
- 2
- >>> G.number_of_edges(1, 0)
- 1
-
- """
- ifuisNone:
- returnself.size()
- try:
- edgedata=self._adj[u][v]
- exceptKeyError:
- return0# no such edge
- returnlen(edgedata)
[docs]defset_edge_attributes(G,values,name=None):
-"""Sets edge attributes from a given value or dictionary of values.
-
- .. Warning:: The call order of arguments `values` and `name`
- switched between v1.x & v2.x.
-
- Parameters
- ----------
- G : EasyGraph Graph
-
- values : scalar value, dict-like
- What the edge attribute should be set to. If `values` is
- not a dictionary, then it is treated as a single attribute value
- that is then applied to every edge in `G`. This means that if
- you provide a mutable object, like a list, updates to that object
- will be reflected in the edge attribute for each edge. The attribute
- name will be `name`.
-
- If `values` is a dict or a dict of dict, it should be keyed
- by edge tuple to either an attribute value or a dict of attribute
- key/value pairs used to update the edge's attributes.
- For multigraphs, the edge tuples must be of the form ``(u, v, key)``,
- where `u` and `v` are nodes and `key` is the edge key.
- For non-multigraphs, the keys must be tuples of the form ``(u, v)``.
-
- name : string (optional, default=None)
- Name of the edge attribute to set if values is a scalar.
-
- Examples
- --------
- After computing some property of the edges of a graph, you may want
- to assign a edge attribute to store the value of that property for
- each edge::
-
- >>> G = eg.path_graph(3)
- >>> bb = eg.edge_betweenness_centrality(G, normalized=False)
- >>> eg.set_edge_attributes(G, bb, "betweenness")
- >>> G.edges[1, 2]["betweenness"]
- 2.0
-
- If you provide a list as the second argument, updates to the list
- will be reflected in the edge attribute for each edge::
-
- >>> labels = []
- >>> eg.set_edge_attributes(G, labels, "labels")
- >>> labels.append("foo")
- >>> G.edges[0, 1]["labels"]
- ['foo']
- >>> G.edges[1, 2]["labels"]
- ['foo']
-
- If you provide a dictionary of dictionaries as the second argument,
- the entire dictionary will be used to update edge attributes::
-
- >>> G = eg.path_graph(3)
- >>> attrs = {(0, 1): {"attr1": 20, "attr2": "nothing"}, (1, 2): {"attr2": 3}}
- >>> eg.set_edge_attributes(G, attrs)
- >>> G[0][1]["attr1"]
- 20
- >>> G[0][1]["attr2"]
- 'nothing'
- >>> G[1][2]["attr2"]
- 3
-
- Note that if the dict contains edges that are not in `G`, they are
- silently ignored::
-
- >>> G = eg.Graph([(0, 1)])
- >>> eg.set_edge_attributes(G, {(1, 2): {"weight": 2.0}})
- >>> (1, 2) in G.edges()
- False
-
- """
- ifnameisnotNone:
- # `values` does not contain attribute names
- try:
- # if `values` is a dict using `.items()` => {edge: value}
- ifG.is_multigraph():
- for(u,v,key),valueinvalues.items():
- try:
- G[u][v][key][name]=value
- exceptKeyError:
- pass
- else:
- for(u,v),valueinvalues.items():
- try:
- G[u][v][name]=value
- exceptKeyError:
- pass
- exceptAttributeError:
- # treat `values` as a constant
- foru,v,datainG.edges:
- data[name]=values
- else:
- # `values` consists of doct-of-dict {edge: {attr: value}} shape
- ifG.is_multigraph():
- for(u,v,key),dinvalues.items():
- try:
- G[u][v][key].update(d)
- exceptKeyError:
- pass
- else:
- for(u,v),dinvalues.items():
- try:
- G[u][v].update(d)
- exceptKeyError:
- pass
-
-
-
[docs]defadd_path(G_to_add_to,nodes_for_path,**attr):
-"""Add a path to the Graph G_to_add_to.
-
- Parameters
- ----------
- G_to_add_to : graph
- A EasyGraph graph
- nodes_for_path : iterable container
- A container of nodes. A path will be constructed from
- the nodes (in order) and added to the graph.
- attr : keyword arguments, optional (default= no attributes)
- Attributes to add to every edge in path.
-
- See Also
- --------
- add_star, add_cycle
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> eg.add_path(G, [0, 1, 2, 3])
- >>> eg.add_path(G, [10, 11, 12], weight=7)
- """
- nlist=iter(nodes_for_path)
- try:
- first_node=next(nlist)
- exceptStopIteration:
- return
- G_to_add_to.add_node(first_node)
- G_to_add_to.add_edges_from(pairwise(chain((first_node,),nlist)),**attr)
-
-
-
[docs]defset_node_attributes(G,values,name=None):
-"""Sets node attributes from a given value or dictionary of values.
-
- .. Warning:: The call order of arguments `values` and `name`
- switched between v1.x & v2.x.
-
- Parameters
- ----------
- G : EasyGraph Graph
-
- values : scalar value, dict-like
- What the node attribute should be set to. If `values` is
- not a dictionary, then it is treated as a single attribute value
- that is then applied to every node in `G`. This means that if
- you provide a mutable object, like a list, updates to that object
- will be reflected in the node attribute for every node.
- The attribute name will be `name`.
-
- If `values` is a dict or a dict of dict, it should be keyed
- by node to either an attribute value or a dict of attribute key/value
- pairs used to update the node's attributes.
-
- name : string (optional, default=None)
- Name of the node attribute to set if values is a scalar.
-
- Examples
- --------
- After computing some property of the nodes of a graph, you may want
- to assign a node attribute to store the value of that property for
- each node::
-
- >>> G = eg.path_graph(3)
- >>> bb = eg.betweenness_centrality(G)
- >>> isinstance(bb, dict)
- True
- >>> eg.set_node_attributes(G, bb, "betweenness")
- >>> G.nodes[1]["betweenness"]
- 1.0
-
- If you provide a list as the second argument, updates to the list
- will be reflected in the node attribute for each node::
-
- >>> G = eg.path_graph(3)
- >>> labels = []
- >>> eg.set_node_attributes(G, labels, "labels")
- >>> labels.append("foo")
- >>> G.nodes[0]["labels"]
- ['foo']
- >>> G.nodes[1]["labels"]
- ['foo']
- >>> G.nodes[2]["labels"]
- ['foo']
-
- If you provide a dictionary of dictionaries as the second argument,
- the outer dictionary is assumed to be keyed by node to an inner
- dictionary of node attributes for that node::
-
- >>> G = eg.path_graph(3)
- >>> attrs = {0: {"attr1": 20, "attr2": "nothing"}, 1: {"attr2": 3}}
- >>> eg.set_node_attributes(G, attrs)
- >>> G.nodes[0]["attr1"]
- 20
- >>> G.nodes[0]["attr2"]
- 'nothing'
- >>> G.nodes[1]["attr2"]
- 3
- >>> G.nodes[2]
- {}
-
- Note that if the dictionary contains nodes that are not in `G`, the
- values are silently ignored::
-
- >>> G = eg.Graph()
- >>> G.add_node(0)
- >>> eg.set_node_attributes(G, {0: "red", 1: "blue"}, name="color")
- >>> G.nodes[0]["color"]
- 'red'
- >>> 1 in G.nodes
- False
-
- """
- # Set node attributes based on type of `values`
- ifnameisnotNone:# `values` must not be a dict of dict
- try:# `values` is a dict
- forn,vinvalues.items():
- try:
- G.nodes[n][name]=values[n]
- exceptKeyError:
- pass
- exceptAttributeError:# `values` is a constant
- forninG:
- G.nodes[n][name]=values
- else:# `values` must be dict of dict
- forn,dinvalues.items():
- try:
- G.nodes[n].update(d)
- exceptKeyError:
- pass
-
-
-deftopological_generations(G):
- ifnotG.is_directed():
- raiseAssertionError("Topological sort not defined on undirected graphs.")
- indegree_map={v:dforv,dinG.in_degree()ifd>0}
- zero_indegree=[vforv,dinG.in_degree()ifd==0]
- whilezero_indegree:
- this_generation=zero_indegree
- zero_indegree=[]
- fornodeinthis_generation:
- ifnodenotinG:
- raiseRuntimeError("Graph changed during iteration")
- forchildinG.neighbors(node):
- try:
- indegree_map[child]-=1
- exceptKeyErroraserr:
- raiseRuntimeError("Graph changed during iteration")fromerr
- ifindegree_map[child]==0:
- zero_indegree.append(child)
- delindegree_map[child]
- yieldthis_generation
-
- ifindegree_map:
- raiseAssertionError("Graph contains a cycle or graph changed during iteration")
-
-
-
[docs]defnumber_of_selfloops(G):
-"""Returns the number of selfloop edges.
-
- A selfloop edge has the same node at both ends.
-
- Returns
- -------
- nloops : int
- The number of selfloops.
-
- See Also
- --------
- nodes_with_selfloops, selfloop_edges
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_edge(1, 1)
- >>> G.add_edge(1, 2)
- >>> eg.number_of_selfloops(G)
- 1
- """
- returnsum(1for_ineg.selfloop_edges(G))
-
-
-
[docs]defselfloop_edges(G,data=False,keys=False,default=None):
-"""Returns an iterator over selfloop edges.
-
- A selfloop edge has the same node at both ends.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph.
- data : string or bool, optional (default=False)
- Return selfloop edges as two tuples (u, v) (data=False)
- or three-tuples (u, v, datadict) (data=True)
- or three-tuples (u, v, datavalue) (data='attrname')
- keys : bool, optional (default=False)
- If True, return edge keys with each edge.
- default : value, optional (default=None)
- Value used for edges that don't have the requested attribute.
- Only relevant if data is not True or False.
-
- Returns
- -------
- edgeiter : iterator over edge tuples
- An iterator over all selfloop edges.
-
- See Also
- --------
- nodes_with_selfloops, number_of_selfloops
-
- Examples
- --------
- >>> G = eg.MultiGraph() # or Graph, DiGraph, MultiDiGraph, etc
- >>> ekey = G.add_edge(1, 1)
- >>> ekey = G.add_edge(1, 2)
- >>> list(eg.selfloop_edges(G))
- [(1, 1)]
- >>> list(eg.selfloop_edges(G, data=True))
- [(1, 1, {})]
- >>> list(eg.selfloop_edges(G, keys=True))
- [(1, 1, 0)]
- >>> list(eg.selfloop_edges(G, keys=True, data=True))
- [(1, 1, 0, {})]
- """
- ifdataisTrue:
- ifG.is_multigraph():
- ifkeysisTrue:
- return(
- (n,n,k,d)
- forn,nbrsinG.adj.items()
- ifninnbrs
- fork,dinnbrs[n].items()
- )
- else:
- return(
- (n,n,d)
- forn,nbrsinG.adj.items()
- ifninnbrs
- fordinnbrs[n].values()
- )
- else:
- return((n,n,nbrs[n])forn,nbrsinG.adj.items()ifninnbrs)
- elifdataisnotFalse:
- ifG.is_multigraph():
- ifkeysisTrue:
- return(
- (n,n,k,d.get(data,default))
- forn,nbrsinG.adj.items()
- ifninnbrs
- fork,dinnbrs[n].items()
- )
- else:
- return(
- (n,n,d.get(data,default))
- forn,nbrsinG.adj.items()
- ifninnbrs
- fordinnbrs[n].values()
- )
- else:
- return(
- (n,n,nbrs[n].get(data,default))
- forn,nbrsinG.adj.items()
- ifninnbrs
- )
- else:
- ifG.is_multigraph():
- ifkeysisTrue:
- return(
- (n,n,k)forn,nbrsinG.adj.items()ifninnbrsforkinnbrs[n]
- )
- else:
- return(
- (n,n)
- forn,nbrsinG.adj.items()
- ifninnbrs
- foriinrange(len(nbrs[n]))# for easy edge removal (#4068)
- )
- else:
- return((n,n)forn,nbrsinG.adj.items()ifninnbrs)
-
-
-@hybrid("cpp_density")
-defdensity(G):
-r"""Returns the density of a graph.
-
- The density for undirected graphs is
-
- .. math::
-
- d = \frac{2m}{n(n-1)},
-
- and for directed graphs is
-
- .. math::
-
- d = \frac{m}{n(n-1)},
-
- where `n` is the number of nodes and `m` is the number of edges in `G`.
-
- Notes
- -----
- The density is 0 for a graph without edges and 1 for a complete graph.
- The density of multigraphs can be higher than 1.
-
- Self loops are counted in the total number of edges so graphs with self
- loops can have density higher than 1.
- """
- n=G.number_of_nodes()
- m=G.number_of_edges()
- ifm==0orn<=1:
- return0
- d=m/(n*(n-1))
- ifnotG.is_directed():
- d*=2
- returnd
-
[docs]defto_easygraph_graph(data,create_using=None,multigraph_input=False):
-"""Make a EasyGraph graph from a known data structure.
-
- The preferred way to call this is automatically
- from the class constructor
-
- >>> d = {0: {1: {"weight": 1}}} # dict-of-dicts single edge (0,1)
- >>> G = eg.Graph(d)
-
- instead of the equivalent
-
- >>> G = eg.from_dict_of_dicts(d)
-
- Parameters
- ----------
- data : object to be converted
-
- Current known types are:
- any EasyGraph graph
- dict-of-dicts
- dict-of-lists
- container (e.g. set, list, tuple) of edges
- iterator (e.g. itertools.chain) that produces edges
- generator of edges
- Pandas DataFrame (row per edge)
- numpy matrix
- numpy ndarray
- scipy sparse matrix
- pygraphviz agraph
-
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
-
- multigraph_input : bool (default False)
- If True and data is a dict_of_dicts,
- try to create a multigraph assuming dict_of_dict_of_lists.
- If data and create_using are both multigraphs then create
- a multigraph from a multigraph.
-
- """
-
- # EasyGraph graph type
- ifhasattr(data,"adj"):
- try:
- result=from_dict_of_dicts(
- data.adj,
- create_using=create_using,
- multigraph_input=data.is_multigraph(),
- )
- # data.graph should be dict-like
- result.graph.update(data.graph)
- # data.nodes should be dict-like
- # result.add_node_from(data.nodes.items()) possible but
- # for custom node_attr_dict_factory which may be hashable
- # will be unexpected behavior
- forn,ddindata.nodes.items():
- result._node[n].update(dd)
- returnresult
- exceptExceptionaserr:
- raiseeg.EasyGraphError("Input is not a correct EasyGraph graph.")fromerr
-
- # pygraphviz agraph
- ifhasattr(data,"is_strict"):
- try:
- returneg.from_pyGraphviz_agraph(data,create_using=create_using)
- exceptExceptionaserr:
- raiseeg.EasyGraphError("Input is not a correct pygraphviz graph.")fromerr
-
- # dict of dicts/lists
- ifisinstance(data,dict):
- try:
- returnfrom_dict_of_dicts(
- data,create_using=create_using,multigraph_input=multigraph_input
- )
- exceptExceptionaserr:
- ifmultigraph_inputisTrue:
- raiseeg.EasyGraphError(
- f"converting multigraph_input raised:\n{type(err)}: {err}"
- )
- try:
- returnfrom_dict_of_lists(data,create_using=create_using)
- exceptExceptionaserr:
- raiseTypeError("Input is not known type.")fromerr
-
- # Pandas DataFrame
- try:
- importpandasaspd
-
- ifisinstance(data,pd.DataFrame):
- ifdata.shape[0]==data.shape[1]:
- try:
- returneg.from_pandas_adjacency(data,create_using=create_using)
- exceptExceptionaserr:
- msg="Input is not a correct Pandas DataFrame adjacency matrix."
- raiseeg.EasyGraphError(msg)fromerr
- else:
- try:
- returneg.from_pandas_edgelist(
- data,edge_attr=True,create_using=create_using
- )
- exceptExceptionaserr:
- msg="Input is not a correct Pandas DataFrame adjacency edge-list."
- raiseeg.EasyGraphError(msg)fromerr
- exceptImportError:
- warnings.warn("pandas not found, skipping conversion test.",ImportWarning)
-
- # numpy matrix or ndarray
- try:
- importnumpyasnp
-
- ifisinstance(data,np.ndarray):
- try:
- returneg.from_numpy_array(data,create_using=create_using)
- exceptExceptionaserr:
- raiseeg.EasyGraphError(
- "Input is not a correct numpy matrix or array."
- )fromerr
- exceptImportError:
- warnings.warn("numpy not found, skipping conversion test.",ImportWarning)
-
- # scipy sparse matrix - any format
- try:
- ifhasattr(data,"format"):
- try:
- returneg.from_scipy_sparse_matrix(data,create_using=create_using)
- exceptExceptionaserr:
- raiseeg.EasyGraphError(
- "Input is not a correct scipy sparse matrix type."
- )fromerr
- exceptImportError:
- warnings.warn("scipy not found, skipping conversion test.",ImportWarning)
-
- # Note: most general check - should remain last in order of execution
- # Includes containers (e.g. list, set, dict, etc.), generators, and
- # iterators (e.g. itertools.chain) of edges
-
- ifisinstance(data,(Collection,Generator,Iterator)):
- try:
- returnfrom_edgelist(data,create_using=create_using)
- exceptExceptionaserr:
- raiseeg.EasyGraphError("Input is not a valid edge list")fromerr
-
- raiseeg.EasyGraphError("Input is not a known data type for conversion.")
-
-
-
[docs]deffrom_dict_of_lists(d,create_using=None):
- G=eg.empty_graph(0,create_using)
- G.add_nodes_from(d)
- ifG.is_multigraph()andnotG.is_directed():
- # a dict_of_lists can't show multiedges. BUT for undirected graphs,
- # each edge shows up twice in the dict_of_lists.
- # So we need to treat this case separately.
- seen={}
- fornode,nbrlistind.items():
- fornbrinnbrlist:
- ifnbrnotinseen:
- G.add_edge(node,nbr)
- seen[node]=1# don't allow reverse edge to show up
- else:
- G.add_edges_from(
- ((node,nbr)fornode,nbrlistind.items()fornbrinnbrlist)
- )
- returnG
-
-
-
[docs]deffrom_dict_of_dicts(d,create_using=None,multigraph_input=False):
- G=eg.empty_graph(0,create_using)
- G.add_nodes_from(d)
- # does dict d represent a MultiGraph or MultiDiGraph?
- ifmultigraph_input:
- ifG.is_directed():
- ifG.is_multigraph():
- G.add_edges_from(
- (u,v,key,data)
- foru,nbrsind.items()
- forv,datadictinnbrs.items()
- forkey,dataindatadict.items()
- )
- else:
- G.add_edges_from(
- (u,v,data)
- foru,nbrsind.items()
- forv,datadictinnbrs.items()
- forkey,dataindatadict.items()
- )
- else:# Undirected
- ifG.is_multigraph():
- seen=set()# don't add both directions of undirected graph
- foru,nbrsind.items():
- forv,datadictinnbrs.items():
- if(u,v)notinseen:
- G.add_edges_from(
- (u,v,key,data)forkey,dataindatadict.items()
- )
- seen.add((v,u))
- else:
- seen=set()# don't add both directions of undirected graph
- foru,nbrsind.items():
- forv,datadictinnbrs.items():
- if(u,v)notinseen:
- G.add_edges_from(
- (u,v,data)forkey,dataindatadict.items()
- )
- seen.add((v,u))
-
- else:# not a multigraph to multigraph transfer
- ifG.is_multigraph()andnotG.is_directed():
- # d can have both representations u-v, v-u in dict. Only add one.
- # We don't need this check for digraphs since we add both directions,
- # or for Graph() since it is done implicitly (parallel edges not allowed)
- seen=set()
- foru,nbrsind.items():
- forv,datainnbrs.items():
- if(u,v)notinseen:
- G.add_edge(u,v,key=0)
- G[u][v][0].update(data)
- seen.add((v,u))
- else:
- G.add_edges_from(
- ((u,v,data)foru,nbrsind.items()forv,datainnbrs.items())
- )
- returnG
-
-
-
[docs]deffrom_edgelist(edgelist,create_using=None):
-"""Returns a graph from a list of edges.
-
- Parameters
- ----------
- edgelist : list or iterator
- Edge tuples
-
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
-
- Examples
- --------
- >>> edgelist = [(0, 1)] # single edge (0,1)
- >>> G = eg.from_edgelist(edgelist)
-
- or
-
- >>> G = eg.Graph(edgelist) # use Graph constructor
-
- """
- G=eg.empty_graph(0,create_using)
- G.add_edges_from(edgelist)
- returnG
-
-
-
[docs]defto_networkx(g:"Union[Graph, DiGraph]")->"Union[nx.Graph, nx.DiGraph]":
-"""Convert an EasyGraph to a NetworkX graph.
-
- Args:
- g (Union[Graph, DiGraph]): An EasyGraph graph
-
- Raises:
- ImportError is raised if NetworkX is not installed.
-
- Returns:
- Union[nx.Graph, nx.DiGraph]: Converted NetworkX graph
- """
- # if load_func_name in di_load_functions_name:
- try:
- importnetworkxasnx
- exceptImportError:
- raiseImportError("NetworkX not found. Please install it.")
- ifg.is_directed():
- G=nx.DiGraph()
- else:
- G=nx.Graph()
-
- # copy attributes
- G.graph=deepcopy(g.graph)
-
- nodes_with_edges=set()
- forv1,v2,_ing.edges:
- G.add_edge(v1,v2)
- nodes_with_edges.add(v1)
- nodes_with_edges.add(v2)
- fornodeinset(g.nodes)-nodes_with_edges:
- G.add_node(node)
- returnG
[docs]defto_dgl(g:"Union[Graph, DiGraph]"):
-"""Convert an EasyGraph graph to a DGL graph.
-
- Args:
- g (Union[Graph, DiGraph]): An EasyGraph graph
-
- Raises:
- ImportError: If DGL is not installed.
-
- Returns:
- DGLGraph: Converted DGL graph
- """
- try:
- importdgl
- exceptImportError:
- raiseImportError("DGL not found. Please install it.")
- g_nx=to_networkx(g)
- g_dgl=dgl.from_networkx(g_nx)
- returng_dgl
-
-
-
[docs]deffrom_dgl(g)->"Union[Graph, DiGraph]":
-"""Convert a DGL graph to an EasyGraph graph.
-
- Args:
- g (DGLGraph): A DGL graph
-
- Raises:
- ImportError: If DGL is not installed.
-
- Returns:
- Union[Graph, DiGraph]: Converted EasyGraph graph
- """
- try:
- importdgl
- exceptImportError:
- raiseImportError("DGL not found. Please install it.")
- g_nx=dgl.to_networkx(g)
- g_eg=from_networkx(g_nx)
- returng_eg
-
-
-
[docs]defto_pyg(
- G:Any,
- group_node_attrs:Optional[Union[List[str],all]]=None,# type: ignore
- group_edge_attrs:Optional[Union[List[str],all]]=None,# type: ignore
-)->"torch_geometric.data.Data":# type: ignore
-r"""Converts a :obj:`easygraph.Graph` or :obj:`easygraph.DiGraph` to a
- :class:`torch_geometric.data.Data` instance.
-
- Args:
- G (easygraph.Graph or easygraph.DiGraph): A easygraph graph.
- group_node_attrs (List[str] or all, optional): The node attributes to
- be concatenated and added to :obj:`data.x`. (default: :obj:`None`)
- group_edge_attrs (List[str] or all, optional): The edge attributes to
- be concatenated and added to :obj:`data.edge_attr`.
- (default: :obj:`None`)
-
- .. note::
-
- All :attr:`group_node_attrs` and :attr:`group_edge_attrs` values must
- be numeric.
-
- Examples:
-
- >>> import torch_geometric as pyg
-
- >>> pyg_to_networkx = pyg.utils.convert.to_networkx # type: ignore
- >>> networkx_to_pyg = pyg.utils.convert.from_networkx # type: ignore
- >>> Data = pyg.data.Data # type: ignore
- >>> edge_index = torch.tensor([
- ... [0, 1, 1, 2, 2, 3],
- ... [1, 0, 2, 1, 3, 2],
- ... ])
- >>> data = Data(edge_index=edge_index, num_nodes=4)
- >>> g = pyg_to_networkx(data)
- >>> # A `Data` object is returned
- >>> to_pyg(g)
- Data(edge_index=[2, 6], num_nodes=4)
- """
- try:
- importtorch_geometricaspyg
-
- pyg_to_networkx=pyg.utils.convert.to_networkx# type: ignore
- networkx_to_pyg=pyg.utils.convert.from_networkx# type: ignore
- exceptImportError:
- raiseImportError("pytorch_geometric not found. Please install it.")
-
- g_nx=to_networkx(G)
- g_pyg=networkx_to_pyg(g_nx,group_node_attrs,group_edge_attrs)
- returng_pyg
-
-
-
[docs]deffrom_pyg(
- data:"torch_geometric.data.Data",# type: ignore
- node_attrs:Optional[Iterable[str]]=None,
- edge_attrs:Optional[Iterable[str]]=None,
- graph_attrs:Optional[Iterable[str]]=None,
- to_undirected:Optional[Union[bool,str]]=False,
- remove_self_loops:bool=False,
-)->Any:
-r"""Converts a :class:`torch_geometric.data.Data` instance to a
- :obj:`easygraph.Graph` if :attr:`to_undirected` is set to :obj:`True`, or
- a directed :obj:`easygraph.DiGraph` otherwise.
-
- Args:
- data (torch_geometric.data.Data): The data object.
- node_attrs (iterable of str, optional): The node attributes to be
- copied. (default: :obj:`None`)
- edge_attrs (iterable of str, optional): The edge attributes to be
- copied. (default: :obj:`None`)
- graph_attrs (iterable of str, optional): The graph attributes to be
- copied. (default: :obj:`None`)
- to_undirected (bool or str, optional): If set to :obj:`True` or
- "upper", will return a :obj:`easygraph.Graph` instead of a
- :obj:`easygraph.DiGraph`. The undirected graph will correspond to
- the upper triangle of the corresponding adjacency matrix.
- Similarly, if set to "lower", the undirected graph will correspond
- to the lower triangle of the adjacency matrix. (default:
- :obj:`False`)
- remove_self_loops (bool, optional): If set to :obj:`True`, will not
- include self loops in the resulting graph. (default: :obj:`False`)
-
- Examples:
-
- >>> import torch_geometric as pyg
-
- >>> Data = pyg.data.Data # type: ignore
- >>> edge_index = torch.tensor([
- ... [0, 1, 1, 2, 2, 3],
- ... [1, 0, 2, 1, 3, 2],
- ... ])
- >>> data = Data(edge_index=edge_index, num_nodes=4)
- >>> from_pyg(data)
- <easygraph.classes.digraph.DiGraph at 0x2713fdb40d0>
-
- """
-
- try:
- importtorch_geometricaspyg
-
- pyg_to_networkx=pyg.utils.convert.to_networkx# type: ignore
- networkx_to_pyg=pyg.utils.convert.from_networkx# type: ignore
- exceptImportError:
- raiseImportError("pytorch_geometric not found. Please install it.")
- g_nx=pyg_to_networkx(
- data,node_attrs,edge_attrs,graph_attrs,to_undirected,remove_self_loops
- )
- g_eg=from_networkx(g_nx)
- returng_eg
[docs]defload_from_pickle(
- file_path:Path,keys:Optional[Union[str,List[str]]]=None,**kwargs
-):
-r"""Load data from a pickle file.
-
- Args:
- ``file_path`` (``Path``): The local path of the file.
- ``keys`` (``Union[str, List[str]]``, optional): The keys of the data. Defaults to ``None``.
- """
- ifisinstance(file_path,list):
- raiseValueError("This function only support loading data from a single file.")
- withopen(file_path,"rb")asf:
- data=pkl.load(f,**kwargs)
- ifkeysisNone:
- returndata
- elifisinstance(keys,str):
- returndata[keys]
- else:
- return{key:data[key]forkeyinkeys}
-
-
-
[docs]defload_from_json(file_path:Path,**kwargs):
-r"""Load data from a json file.
-
- Args:
- ``file_path`` (``Path``): The local path of the file.
- """
- withopen(file_path,"r")asf:
- data=json.load(f,**kwargs)
- returndata
-
-
-
[docs]defload_from_txt(
- file_path:Path,
- dtype:Union[str,Callable],
- sep:str=",| |\t",
- ignore_header:int=0,
-):
-r"""Load data from a txt file.
-
- .. note::
- The separator is a regular expression of ``re`` module. Multiple separators can be separated by ``|``. More details can refer to `re.split <https://docs.python.org/3/library/re.html#re.split>`_.
-
- Args:
- ``file_path`` (``Path``): The local path of the file.
- ``dtype`` (``Union[str, Callable]``): The data type of the data can be either a string or a callable function.
- ``sep`` (``str``, optional): The separator of each line in the file. Defaults to ``",| |\t"``.
- ``ignore_header`` (``int``, optional): The number of lines to ignore in the header of the file. Defaults to ``0``.
- """
- cast_fun=ret_cast_fun(dtype)
- file_path=Path(file_path)
- assertfile_path.exists(),f"{file_path} does not exist."
- data=[]
- withopen(file_path,"r")asf:
- for_inrange(ignore_header):
- f.readline()
- data=[
- list(map(cast_fun,re.split(sep,line.strip())))forlineinf.readlines()
- ]
- returndata
-
-
-
[docs]defret_cast_fun(dtype:Union[str,Callable]):
-r"""Return the cast function of the data type. The supported data types are: ``int``, ``float``, ``str``.
-
- Args:
- ``dtype`` (``Union[str, Callable]``): The data type of the data can be either a string or a callable function.
- """
- ifisinstance(dtype,str):
- ifdtype=="int":
- returnint
- elifdtype=="float":
- returnfloat
- elifdtype=="str":
- returnstr
- else:
- raiseValueError("dtype must be one of 'int', 'float', 'str'.")
- else:
- returndtype
[docs]defnorm_ft(X:torch.Tensor,ord:Optional[Union[int,float]]=None)->torch.Tensor:
-r"""Normalize the input feature matrix with specified ``ord`` refer to pytorch's `torch.linalg.norm <https://pytorch.org/docs/stable/generated/torch.linalg.norm.html#torch.linalg.norm>`_ function.
-
- .. note::
- The input feature matrix is expected to be a 1D vector or a 2D tensor with shape (num_samples, num_features).
-
- Args:
- ``X`` (``torch.Tensor``): The input feature.
- ``ord`` (``Union[int, float]``, optional): The order of the norm can be either an ``int``, ``float``. If ``ord`` is ``None``, the norm is computed with the 2-norm. Defaults to ``None``.
-
- Examples:
- >>> import easygraph.datapipe as dd
- >>> import torch
- >>> X = torch.tensor([
- [0.1, 0.2, 0.5],
- [0.5, 0.2, 0.3],
- [0.3, 0.2, 0]
- ])
- >>> dd.norm_ft(X)
- tensor([[0.1826, 0.3651, 0.9129],
- [0.8111, 0.3244, 0.4867],
- [0.8321, 0.5547, 0.0000]])
- """
- ifX.dim()==1:
- X_norm=1/torch.linalg.norm(X,ord=ord)
- X_norm[torch.isinf(X_norm)]=0
- returnX*X_norm
- elifX.dim()==2:
- X_norm=1/torch.linalg.norm(X,ord=ord,dim=1,keepdim=True)
- X_norm[torch.isinf(X_norm)]=0
- returnX*X_norm
- else:
- raiseValueError(
- "The input feature matrix is expected to be a 1D verter or a 2D tensor with"
- " shape (num_samples, num_features)."
- )
-
-
-
[docs]defmin_max_scaler(X:torch.Tensor,ft_min:float,ft_max:float)->torch.Tensor:
-r"""Normalize the input feature matrix with min-max scaling.
-
- Args:
- ``X`` (``torch.Tensor``): The input feature.
- ``ft_min`` (``float``): The minimum value of the output feature.
- ``ft_max`` (``float``): The maximum value of the output feature.
-
- Examples:
- >>> import easygraph.datapipe as dd
- >>> import torch
- >>> X = torch.tensor([
- [0.1, 0.2, 0.5],
- [0.5, 0.2, 0.3],
- [0.3, 0.2, 0.0]
- ])
- >>> dd.min_max_scaler(X, -1, 1)
- tensor([[-0.6000, -0.2000, 1.0000],
- [ 1.0000, -0.2000, 0.2000],
- [ 0.2000, -0.2000, -1.0000]])
- """
- assert(
- ft_min<ft_max
- ),"The minimum value of the feature should be less than the maximum value."
- X_min,X_max=X.min().item(),X.max().item()
- X_range=X_max-X_min
- scale_=(ft_max-ft_min)/X_range
- min_=ft_min-X_min*scale_
- X=X*scale_+min_
- returnX
[docs]classCitationGraphDataset(EasyGraphBuiltinDataset):
-r"""The citation graph dataset, including Cora, CiteSeer and PubMed.
- Nodes mean authors and edges mean citation relationships.
-
- Parameters
- -----------
- name: str
- name can be 'Cora', 'CiteSeer' or 'PubMed'.
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: True.
- reverse_edge : bool
- Whether to add reverse edges in graph. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~eg.Graph` object and returns
- a transformed version. The :class:`~eg.Graph` object will be
- transformed before every access.
- reorder : bool
- Whether to reorder the graph using :func:`~eg.reorder_graph`. Default: False.
- """
- _urls={
- "cora_v2":"dataset/cora_v2.zip",
- "citeseer":"dataset/citeSeer.zip",
- "pubmed":"dataset/pubmed.zip",
- }
-
- def__init__(
- self,
- name,
- raw_dir=None,
- force_reload=False,
- verbose=True,
- reverse_edge=True,
- transform=None,
- reorder=False,
- ):
- assertname.lower()in["cora","citeseer","pubmed"]
-
- # Previously we use the pre-processing in pygcn (https://github.com/tkipf/pygcn)
- # for Cora, which is slightly different from the one used in the GCN paper
- ifname.lower()=="cora":
- name="cora_v2"
-
- url=_get_dgl_url(self._urls[name])
- self._reverse_edge=reverse_edge
- self._reorder=reorder
-
- super(CitationGraphDataset,self).__init__(
- name,
- url=url,
- raw_dir=raw_dir,
- force_reload=force_reload,
- verbose=verbose,
- transform=transform,
- )
-
-
[docs]defprocess(self):
-"""Loads input data from data directory and reorder graph for better locality
-
- ind.name.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
- ind.name.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
- ind.name.allx => the feature vectors of both labeled and unlabeled training instances
- (a superset of ind.name.x) as scipy.sparse.csr.csr_matrix object;
- ind.name.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
- ind.name.ty => the one-hot labels of the test instances as numpy.ndarray object;
- ind.name.ally => the labels for instances in ind.name.allx as numpy.ndarray object;
- ind.name.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
- object;
- ind.name.test.index => the indices of test instances in graph, for the inductive setting as list object.
- """
- root=self.raw_path
- objnames=["x","y","tx","ty","allx","ally","graph"]
- objects=[]
- foriinrange(len(objnames)):
- withopen("{}/ind.{}.{}".format(root,self.name,objnames[i]),"rb")asf:
- objects.append(_pickle_load(f))
-
- x,y,tx,ty,allx,ally,graph=tuple(objects)
- test_idx_reorder=_parse_index_file(
- "{}/ind.{}.test.index".format(root,self.name)
- )
- test_idx_range=np.sort(test_idx_reorder)
-
- ifself.name=="citeseer":
- # Fix CiteSeer dataset (there are some isolated nodes in the graph)
- # Find isolated nodes, add them as zero-vecs into the right position
- test_idx_range_full=range(
- min(test_idx_reorder),max(test_idx_reorder)+1
- )
- tx_extended=sp.lil_matrix((len(test_idx_range_full),x.shape[1]))
- tx_extended[test_idx_range-min(test_idx_range),:]=tx
- tx=tx_extended
- ty_extended=np.zeros((len(test_idx_range_full),y.shape[1]))
- ty_extended[test_idx_range-min(test_idx_range),:]=ty
- ty=ty_extended
-
- features=sp.vstack((allx,tx)).tolil()
- features[test_idx_reorder,:]=features[test_idx_range,:]
-
- ifself.reverse_edge:
- g=eg.DiGraph(eg.from_dict_of_lists(graph))
- # g = from_networkx(graph)
- else:
- graph=eg.Graph(eg.from_dict_of_lists(graph))
- # edges = list(graph.edges())
- # u, v = map(list, zip(*edges))
- # g = dgl_graph((u, v))
-
- onehot_labels=np.vstack((ally,ty))
- onehot_labels[test_idx_reorder,:]=onehot_labels[test_idx_range,:]
- labels=np.argmax(onehot_labels,1)
-
- idx_test=test_idx_range.tolist()
- idx_train=range(len(y))
- idx_val=range(len(y),len(y)+500)
-
- train_mask=generate_mask_tensor(_sample_mask(idx_train,labels.shape[0]))
- val_mask=generate_mask_tensor(_sample_mask(idx_val,labels.shape[0]))
- test_mask=generate_mask_tensor(_sample_mask(idx_test,labels.shape[0]))
-
- g.ndata["train_mask"]=train_mask
- g.ndata["val_mask"]=val_mask
- g.ndata["test_mask"]=test_mask
- g.ndata["label"]=tensor(labels)
- g.ndata["feat"]=tensor(
- _preprocess_features(features),dtype=data_type_dict()["float32"]
- )
- self._num_classes=onehot_labels.shape[1]
- self._labels=labels
- # if self._reorder:
- # self._g = reorder_graph(
- # g, node_permute_algo='rcmk', edge_permute_algo='dst', store_ids=False)
- # else:
- self._g=g
-
- ifself.verbose:
- print("Finished data loading and preprocessing.")
- print(" NumNodes: {}".format(self._g.number_of_nodes()))
- print(" NumEdges: {}".format(self._g.number_of_edges()))
- print(" NumFeats: {}".format(self._g.ndata["feat"].shape[1]))
- print(" NumClasses: {}".format(self.num_classes))
- print(
- " NumTrainingSamples: {}".format(
- nonzero_1d(self._g.ndata["train_mask"]).shape[0]
- )
- )
- print(
- " NumValidationSamples: {}".format(
- nonzero_1d(self._g.ndata["val_mask"]).shape[0]
- )
- )
- print(
- " NumTestSamples: {}".format(
- nonzero_1d(self._g.ndata["test_mask"]).shape[0]
- )
- )
[docs]classCoraGraphDataset(CitationGraphDataset):
-r"""Cora citation network dataset.
-
- Nodes mean paper and edges mean citation
- relationships. Each node has a predefined
- feature with 1433 dimensions. The dataset is
- designed for the node classification task.
- The task is to predict the category of
- certain paper.
-
- Statistics:
-
- - Nodes: 2708
- - Edges: 10556
- - Number of Classes: 7
- - Label split:
-
- - Train: 140
- - Valid: 500
- - Test: 1000
-
- Parameters
- ----------
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: True.
- reverse_edge : bool
- Whether to add reverse edges in graph. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
- reorder : bool
- Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False.
-
- Attributes
- ----------
- num_classes: int
- Number of label classes
-
- Notes
- -----
- The node feature is row-normalized.
-
- Examples
- --------
- >>> dataset = CoraGraphDataset()
- >>> g = dataset[0]
- >>> num_class = dataset.num_classes
- >>>
- >>> # get node feature
- >>> feat = g.ndata['feat']
- >>>
- >>> # get data split
- >>> train_mask = g.ndata['train_mask']
- >>> val_mask = g.ndata['val_mask']
- >>> test_mask = g.ndata['test_mask']
- >>>
- >>> # get labels
- >>> label = g.ndata['label']
-
- """
-
- def__init__(
- self,
- raw_dir=None,
- force_reload=False,
- verbose=True,
- reverse_edge=True,
- transform=None,
- reorder=False,
- ):
- name="cora"
-
- super(CoraGraphDataset,self).__init__(
- name,raw_dir,force_reload,verbose,reverse_edge,transform,reorder
- )
-
- def__getitem__(self,idx):
-r"""Gets the graph object
-
- Parameters
- -----------
- idx: int
- Item index, CoraGraphDataset has only one graph object
-
- Return
- ------
- :class:`dgl.DGLGraph`
-
- graph structure, node features and labels.
-
- - ``ndata['train_mask']``: mask for training node set
- - ``ndata['val_mask']``: mask for validation node set
- - ``ndata['test_mask']``: mask for test node set
- - ``ndata['feat']``: node feature
- - ``ndata['label']``: ground truth labels
- """
- returnsuper(CoraGraphDataset,self).__getitem__(idx)
-
- def__len__(self):
-r"""The number of graphs in the dataset."""
- returnsuper(CoraGraphDataset,self).__len__()
-
-
-
[docs]classCiteseerGraphDataset(CitationGraphDataset):
-r"""Citeseer citation network dataset.
-
- Nodes mean scientific publications and edges
- mean citation relationships. Each node has a
- predefined feature with 3703 dimensions. The
- dataset is designed for the node classification
- task. The task is to predict the category of
- certain publication.
-
- Statistics:
-
- - Nodes: 3327
- - Edges: 9228
- - Number of Classes: 6
- - Label Split:
-
- - Train: 120
- - Valid: 500
- - Test: 1000
-
- Parameters
- -----------
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: True.
- reverse_edge : bool
- Whether to add reverse edges in graph. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
- reorder : bool
- Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False.
-
- Attributes
- ----------
- num_classes: int
- Number of label classes
-
- Notes
- -----
- The node feature is row-normalized.
-
- In citeseer dataset, there are some isolated nodes in the graph.
- These isolated nodes are added as zero-vecs into the right position.
-
- Examples
- --------
- >>> dataset = CiteseerGraphDataset()
- >>> g = dataset[0]
- >>> num_class = dataset.num_classes
- >>>
- >>> # get node feature
- >>> feat = g.ndata['feat']
- >>>
- >>> # get data split
- >>> train_mask = g.ndata['train_mask']
- >>> val_mask = g.ndata['val_mask']
- >>> test_mask = g.ndata['test_mask']
- >>>
- >>> # get labels
- >>> label = g.ndata['label']
-
- """
-
- def__init__(
- self,
- raw_dir=None,
- force_reload=False,
- verbose=True,
- reverse_edge=True,
- transform=None,
- reorder=False,
- ):
- name="citeseer"
-
- super(CiteseerGraphDataset,self).__init__(
- name,raw_dir,force_reload,verbose,reverse_edge,transform,reorder
- )
-
- def__getitem__(self,idx):
-r"""Gets the graph object
-
- Parameters
- -----------
- idx: int
- Item index, CiteseerGraphDataset has only one graph object
-
- Return
- ------
- :class:`dgl.DGLGraph`
-
- graph structure, node features and labels.
-
- - ``ndata['train_mask']``: mask for training node set
- - ``ndata['val_mask']``: mask for validation node set
- - ``ndata['test_mask']``: mask for test node set
- - ``ndata['feat']``: node feature
- - ``ndata['label']``: ground truth labels
- """
- returnsuper(CiteseerGraphDataset,self).__getitem__(idx)
-
- def__len__(self):
-r"""The number of graphs in the dataset."""
- returnsuper(CiteseerGraphDataset,self).__len__()
-
-
-
[docs]classPubmedGraphDataset(CitationGraphDataset):
-r"""Pubmed citation network dataset.
-
- Nodes mean scientific publications and edges
- mean citation relationships. Each node has a
- predefined feature with 500 dimensions. The
- dataset is designed for the node classification
- task. The task is to predict the category of
- certain publication.
-
- Statistics:
-
- - Nodes: 19717
- - Edges: 88651
- - Number of Classes: 3
- - Label Split:
-
- - Train: 60
- - Valid: 500
- - Test: 1000
-
- Parameters
- -----------
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: True.
- reverse_edge : bool
- Whether to add reverse edges in graph. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
- reorder : bool
- Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False.
-
- Attributes
- ----------
- num_classes: int
- Number of label classes
-
- Notes
- -----
- The node feature is row-normalized.
-
- Examples
- --------
- >>> dataset = PubmedGraphDataset()
- >>> g = dataset[0]
- >>> num_class = dataset.num_of_class
- >>>
- >>> # get node feature
- >>> feat = g.ndata['feat']
- >>>
- >>> # get data split
- >>> train_mask = g.ndata['train_mask']
- >>> val_mask = g.ndata['val_mask']
- >>> test_mask = g.ndata['test_mask']
- >>>
- >>> # get labels
- >>> label = g.ndata['label']
-
- """
-
- def__init__(
- self,
- raw_dir=None,
- force_reload=False,
- verbose=True,
- reverse_edge=True,
- transform=None,
- reorder=False,
- ):
- name="pubmed"
-
- super(PubmedGraphDataset,self).__init__(
- name,raw_dir,force_reload,verbose,reverse_edge,transform,reorder
- )
-
- def__getitem__(self,idx):
-r"""Gets the graph object
-
- Parameters
- -----------
- idx: int
- Item index, PubmedGraphDataset has only one graph object
-
- Return
- ------
- :class:`dgl.DGLGraph`
-
- graph structure, node features and labels.
-
- - ``ndata['train_mask']``: mask for training node set
- - ``ndata['val_mask']``: mask for validation node set
- - ``ndata['test_mask']``: mask for test node set
- - ``ndata['feat']``: node feature
- - ``ndata['label']``: ground truth labels
- """
- returnsuper(PubmedGraphDataset,self).__getitem__(idx)
-
- def__len__(self):
-r"""The number of graphs in the dataset."""
- returnsuper(PubmedGraphDataset,self).__len__()
-
-
-
[docs]defload_cora(
- raw_dir=None,force_reload=False,verbose=True,reverse_edge=True,transform=None
-):
-"""Get CoraGraphDataset
-
- Parameters
- -----------
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: True.
- reverse_edge : bool
- Whether to add reverse edges in graph. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
-
- Return
- -------
- CoraGraphDataset
- """
- data=CoraGraphDataset(raw_dir,force_reload,verbose,reverse_edge,transform)
- returndata
-
-
-
[docs]defload_citeseer(
- raw_dir=None,force_reload=False,verbose=True,reverse_edge=True,transform=None
-):
-"""Get CiteseerGraphDataset
-
- Parameters
- -----------
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: True.
- reverse_edge : bool
- Whether to add reverse edges in graph. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
-
- Return
- -------
- CiteseerGraphDataset
- """
- data=CiteseerGraphDataset(raw_dir,force_reload,verbose,reverse_edge,transform)
- returndata
-
-
-
[docs]defload_pubmed(
- raw_dir=None,force_reload=False,verbose=True,reverse_edge=True,transform=None
-):
-"""Get PubmedGraphDataset
-
- Parameters
- -----------
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: True.
- reverse_edge : bool
- Whether to add reverse edges in graph. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
-
- Return
- -------
- PubmedGraphDataset
- """
- data=PubmedGraphDataset(raw_dir,force_reload,verbose,reverse_edge,transform)
- returndata
-
-
-
[docs]classCoraBinary(EasyGraphBuiltinDataset):
-"""A mini-dataset for binary classification task using Cora.
-
- After loaded, it has following members:
-
- graphs : list of :class:`~dgl.DGLGraph`
- pmpds : list of :class:`scipy.sparse.coo_matrix`
- labels : list of :class:`numpy.ndarray`
-
- Parameters
- -----------
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose: bool
- Whether to print out progress information. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
- """
-
- def__init__(self,raw_dir=None,force_reload=False,verbose=True,transform=None):
- name="cora_binary"
- url=_get_dgl_url("dataset/cora_binary.zip")
- super(CoraBinary,self).__init__(
- name,
- url=url,
- raw_dir=raw_dir,
- force_reload=force_reload,
- verbose=verbose,
- transform=transform,
- )
-
-
[docs]defprocess(self):
-"""Loads input data from data directory and transfer to target graph for better analysis
- """
-
- self._g,edge_feature_list=dict_to_hypergraph(self.load_data,is_dynamic=True)
-
- self._g.ndata["hyperedge_feature"]=tensor(
- range(1,len(edge_feature_list)+1)
- )
[docs]defprocess(self):
-"""Loads input data from data directory and transfer to target graph for better analysis
- """
- self._g,edge_feature_list=dict_to_hypergraph(self.load_data,is_dynamic=True)
- self._g.ndata["hyperedge_feature"]=tensor(
- range(1,len(edge_feature_list)+1)
- )
[docs]defprocess(self):
-"""Loads input data from data directory and transfer to target graph for better analysis
- """
-
- self._g,edge_feature_list=self.preprocess(self.load_data,is_dynamic=True)
- self._g.ndata["hyperedge_feature"]=tensor(
- range(1,len(edge_feature_list)+1)
- )
[docs]classAmazonCoBuyComputerDataset(GNNBenchmarkDataset):
-r"""'Computer' part of the AmazonCoBuy dataset for node classification task.
-
- Amazon Computers and Amazon Photo are segments of the Amazon co-purchase graph [McAuley et al., 2015],
- where nodes represent goods, edges indicate that two goods are frequently bought together, node
- features are bag-of-words encoded product reviews, and class labels are given by the product category.
-
- Reference: `<https://github.com/shchur/gnn-benchmark#datasets>`_
-
- Statistics:
-
- - Nodes: 13,752
- - Edges: 491,722 (note that the original dataset has 245,778 edges but DGL adds
- the reverse edges and remove the duplicates, hence with a different number)
- - Number of classes: 10
- - Node feature size: 767
-
- Parameters
- ----------
- raw_dir : str
- Raw file directory to download/contains the input data directory.
- Default: ~/.dgl/
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: True.
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
-
- Attributes
- ----------
- num_classes : int
- Number of classes for each node.
-
- Examples
- --------
- >>> data = AmazonCoBuyComputerDataset()
- >>> g = data[0]
- >>> num_class = data.num_classes
- >>> feat = g.ndata['feat'] # get node feature
- >>> label = g.ndata['label'] # get node labels
- """
-
- def__init__(self,raw_dir=None,force_reload=False,verbose=True,transform=None):
- super(AmazonCoBuyComputerDataset,self).__init__(
- name="amazon_co_buy_computer",
- raw_dir=raw_dir,
- force_reload=force_reload,
- verbose=verbose,
- transform=transform,
- )
-
- @property
- defnum_classes(self):
-"""Number of classes.
-
- Return
- -------
- int
- """
- return10
[docs]classEasyGraphDataset(object):
-r"""The basic EasyGraph dataset for creating graph datasets.
- This class defines a basic template class for EasyGraph Dataset.
- The following steps will be executed automatically:
-
- 1. Check whether there is a dataset cache on disk
- (already processed and stored on the disk) by
- invoking ``has_cache()``. If true, goto 5.
- 2. Call ``download()`` to download the data if ``url`` is not None.
- 3. Call ``process()`` to process the data.
- 4. Call ``save()`` to save the processed dataset on disk and goto 6.
- 5. Call ``load()`` to load the processed dataset from disk.
- 6. Done.
-
- Users can overwrite these functions with their
- own data processing logic.
-
- Parameters
- ----------
- name : str
- Name of the dataset
- url : str
- Url to download the raw dataset. Default: None
- raw_dir : str
- Specifying the directory that will store the
- downloaded data or the directory that
- already stores the input data.
- Default: ~/.EasyGraphData/
- save_dir : str
- Directory to save the processed dataset.
- Default: same as raw_dir
- hash_key : tuple
- A tuple of values as the input for the hash function.
- Users can distinguish instances (and their caches on the disk)
- from the same dataset class by comparing the hash values.
- Default: (), the corresponding hash value is ``'f9065fa7'``.
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
-
- """
-
- def__init__(
- self,
- name,
- url=None,
- raw_dir=None,
- save_dir=None,
- hash_key=(),
- force_reload=False,
- verbose=False,
- transform=None,
- ):
- self._name=name
- self._url=url
- self._force_reload=force_reload
- self._verbose=verbose
- self._hash_key=hash_key
- self._hash=self._get_hash()
- self._transform=transform
-
- # if no dir is provided, the default EasyGraph download dir is used.
- ifraw_dirisNone:
- self._raw_dir=get_download_dir()
- else:
- self._raw_dir=raw_dir
-
- ifsave_dirisNone:
- self._save_dir=self._raw_dir
- else:
- self._save_dir=save_dir
- self._load()
-
-
[docs]defdownload(self):
-r"""Overwrite to realize your own logic of downloading data.
-
- It is recommended to download the to the :obj:`self.raw_dir`
- folder. Can be ignored if the dataset is
- already in :obj:`self.raw_dir`.
- """
- pass
-
-
[docs]defsave(self):
-r"""Overwrite to realize your own logic of
- saving the processed dataset into files.
-
- It is recommended to use ``dgl.data.utils.save_graphs``
- to save dgl graph into files and use
- ``dgl.data.utils.save_info`` to save extra
- information into files.
- """
- pass
-
-
[docs]defload(self):
-r"""Overwrite to realize your own logic of
- loading the saved dataset from files.
-
- It is recommended to use ``dgl.data.utils.load_graphs``
- to load dgl graph from files and use
- ``dgl.data.utils.load_info`` to load extra information
- into python dict object.
- """
- pass
-
-
[docs]@abc.abstractmethod
- defprocess(self):
-r"""Overwrite to realize your own logic of processing the input data."""
- pass
-
-
[docs]defhas_cache(self):
-r"""Overwrite to realize your own logic of
- deciding whether there exists a cached dataset.
-
- By default False.
- """
- returnFalse
-
- @retry_method_with_fix(download)
- def_download(self):
-"""Download dataset by calling ``self.download()``
- if the dataset does not exists under ``self.raw_path``.
-
- By default ``self.raw_path = os.path.join(self.raw_dir, self.name)``
- One can overwrite ``raw_path()`` function to change the path.
- """
-
- ifos.path.exists(self.raw_path):# pragma: no cover
- return
-
- makedirs(self.raw_dir)
- self.download()
-
- def_load(self):
-"""Entry point from __init__ to load the dataset.
-
- If cache exists:
-
- - Load the dataset from saved dgl graph and information files.
- - If loading process fails, re-download and process the dataset.
-
- else:
-
- - Download the dataset if needed.
- - Process the dataset and build the dgl graph.
- - Save the processed dataset into files.
- """
-
- load_flag=notself._force_reloadandself.has_cache()
- ifload_flag:
- try:
- self.load()
- self.process()
- ifself.verbose:
- print("Done loading data from cached files.")
- exceptKeyboardInterrupt:
- raise
- except:
- load_flag=False
- ifself.verbose:
- print(traceback.format_exc())
- print("Loading from cache failed, re-processing.")
-
- ifnotload_flag:
- self._download()
- self.process()
- self.save()
- ifself.verbose:
- print("Done saving data into cached files.")
-
- def_get_hash(self):
-"""Compute the hash of the input tuple
-
- Example
- -------
- Assume `self._hash_key = (10, False, True)`
-
- >>> hash_value = self._get_hash()
- >>> hash_value
- 'a770b222'
- """
- hash_func=hashlib.sha1()
- hash_func.update(str(self._hash_key).encode("utf-8"))
- returnhash_func.hexdigest()[:8]
-
- @property
- defurl(self):
-r"""Get url to download the raw dataset."""
- returnself._url
-
- @property
- defname(self):
-r"""Name of the dataset."""
- returnself._name
-
- @property
- defraw_dir(self):
-r"""Raw file directory contains the input data folder."""
- returnself._raw_dir
-
- @property
- defraw_path(self):
-r"""Directory contains the input data files.
- By default raw_path = os.path.join(self.raw_dir, self.name)
- """
- returnos.path.join(self.raw_dir,self.name)
-
- @property
- defsave_dir(self):
-r"""Directory to save the processed dataset."""
- returnself._save_dir
-
- @property
- defsave_path(self):
-r"""Path to save the processed dataset."""
- returnos.path.join(self._save_dir)
-
- @property
- defverbose(self):
-r"""Whether to print information."""
- returnself._verbose
-
- @property
- defhash(self):
-r"""Hash value for the dataset and the setting."""
- returnself._hash
-
- @abc.abstractmethod
- def__getitem__(self,idx):
-r"""Gets the data object at index."""
- pass
-
- @abc.abstractmethod
- def__len__(self):
-r"""The number of examples in the dataset."""
- pass
-
- def__repr__(self):
- returnf'Dataset("{self.name}"'+f" save_path={self.save_path})"
-
-
-
[docs]classEasyGraphBuiltinDataset(EasyGraphDataset):
-r"""The Basic EasyGraph Builtin Dataset.
-
- Parameters
- ----------
- name : str
- Name of the dataset.
- url : str
- Url to download the raw dataset.
- raw_dir : str
- Specifying the directory that will store the
- downloaded data or the directory that
- already stores the input data.
- Default: ~/.dgl/
- hash_key : tuple
- A tuple of values as the input for the hash function.
- Users can distinguish instances (and their caches on the disk)
- from the same dataset class by comparing the hash values.
- force_reload : bool
- Whether to reload the dataset. Default: False
- verbose : bool
- Whether to print out progress information. Default: False
- transform : callable, optional
- A transform that takes in a :class:`~dgl.DGLGraph` object and returns
- a transformed version. The :class:`~dgl.DGLGraph` object will be
- transformed before every access.
- """
-
- def__init__(
- self,
- name,
- url,
- raw_dir=None,
- hash_key=(),
- force_reload=False,
- verbose=True,
- transform=None,
- save_dir=None,
- ):
- super(EasyGraphBuiltinDataset,self).__init__(
- name,
- url=url,
- raw_dir=raw_dir,
- save_dir=save_dir,
- hash_key=hash_key,
- force_reload=force_reload,
- verbose=verbose,
- transform=transform,
- )
-
-
[docs]defdownload(self):
-r"""Automatically download data and extract it."""
- ifself.urlisnotNone:
- zip_file_path=os.path.join(self.raw_dir,self.name+".zip")
- download(self.url,path=zip_file_path)
- extract_archive(zip_file_path,self.raw_path)
[docs]defrequest_text_from_url(url):
-"""
- Requests text data from the specified URL.
-
- Args:
- url (str): The URL from which to request the text data.
-
- Returns:
- str: The text content of the response if the request is successful.
-
- Raises:
- EasyGraphError: If a connection error occurs during the request or if the HTTP response status code
- indicates a failure.
- """
- try:
- r=requests.get(url)
- exceptrequests.ConnectionError:
- raiseEasyGraphError("Connection Error!")
-
- ifr.ok:
- returnr.text
- else:
- raiseEasyGraphError(f"Error: HTTP response {r.status_code}")
-
-
-
[docs]classHouse_Committees:
-"""
- A class for loading and processing the House Committees hypergraph dataset.
-
- This class fetches hyperedge, node label, node name, and label name data from predefined URLs,
- processes the data, and generates a hypergraph representation. It also provides access to various
- dataset attributes through properties and indexing.
-
- Attributes:
- data_root (str): The root URL for the data. If `data_root` is provided during initialization,
- it is set to "https://"; otherwise, it is `None`.
- hyperedges_path (str): The URL of the file containing hyperedge information.
- node_labels_path (str): The URL of the file containing node label information.
- node_names_path (str): The URL of the file containing node name information.
- label_names_path (str): The URL of the file containing label name information.
- _hyperedges (list): A list of tuples representing hyperedges.
- _node_labels (list): A list of node labels.
- _label_names (list): A list of label names.
- _node_names (list): A list of node names.
- _content (dict): A dictionary containing dataset statistics and data, including the number of
- classes, vertices, edges, the edge list, and node labels.
- """
-
- def__init__(self,data_root=None):
-"""
- Initializes a new instance of the `House_Committees` class.
-
- Args:
- data_root (str, optional): The root URL for the data. If provided, it is set to "https://";
- otherwise, it is `None`. Defaults to `None`.
- """
- self.data_root="https://"ifdata_rootisnotNoneelsedata_root
- self.hyperedges_path="https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/hyperedges-house-committees.txt?inline=false"
- self.node_labels_path="https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/node-labels-house-committees.txt?ref_type=heads&inline=false"
- self.node_names_path="https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/node-names-house-committees.txt?ref_type=heads&inline=false"
- self.label_names_path="https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/label-names-house-committees.txt?ref_type=heads&inline=false"
- self._hyperedges=[]
- self._node_labels=[]
- self._label_names=[]
- self._node_names=[]
- self.generate_hypergraph(
- hyperedges_path=self.hyperedges_path,
- node_labels_path=self.node_labels_path,
- node_names_path=self.node_names_path,
- label_names_path=self.label_names_path,
- )
-
- self._content={
- "num_classes":len(self._label_names),
- "num_vertices":len(self._node_labels),
- "num_edges":len(self._hyperedges),
- "edge_list":self._hyperedges,
- "labels":self._node_labels,
- }
-
-
[docs]defprocess_label_txt(self,data_str,delimiter="\n",transform_fun=str):
-"""
- Processes a string containing label data into a list of transformed values.
-
- Args:
- data_str (str): The input string containing label data.
- delimiter (str, optional): The delimiter used to split the input string. Defaults to "\n".
- transform_fun (callable, optional): A function used to transform each label value.
- Defaults to the `str` function.
-
- Returns:
- list: A list of transformed label values.
- """
- data_str=data_str.strip()
- data_lst=data_str.split(delimiter)
- final_lst=[]
- fordataindata_lst:
- data=data.strip()
- data=transform_fun(data)
- final_lst.append(data)
- returnfinal_lst
-
- def__getitem__(self,key:str):
-"""
- Retrieves a value from the `_content` dictionary using the specified key.
-
- Args:
- key (str): The key used to access the `_content` dictionary.
-
- Returns:
- Any: The value corresponding to the key in the `_content` dictionary.
- """
- returnself._content[key]
-
- @property
- defnode_labels(self):
-"""
- Gets the list of node labels.
-
- Returns:
- list: A list of node labels.
- """
- returnself._node_labels
-
- @property
- defnode_names(self):
-"""
- Gets the list of node names.
-
- Returns:
- list: A list of node names.
- """
- returnself._node_names
-
- @property
- deflabel_names(self):
-"""
- Gets the list of label names.
-
- Returns:
- list: A list of label names.
- """
- returnself._label_names
-
- @property
- defhyperedges(self):
-"""
- Gets the list of hyperedges.
-
- Returns:
- list: A list of tuples representing hyperedges.
- """
- returnself._hyperedges
-
-
[docs]defgenerate_hypergraph(
- self,
- hyperedges_path=None,
- node_labels_path=None,
- node_names_path=None,
- label_names_path=None,
- ):
-"""
- Generates a hypergraph by fetching and processing data from the specified URLs.
-
- Args:
- hyperedges_path (str, optional): The URL of the file containing hyperedge information.
- Defaults to `None`.
- node_labels_path (str, optional): The URL of the file containing node label information.
- Defaults to `None`.
- node_names_path (str, optional): The URL of the file containing node name information.
- Defaults to `None`.
- label_names_path (str, optional): The URL of the file containing label name information.
- Defaults to `None`.
- """
-
- deffun(data):
-"""
- Converts a string to an integer and subtracts 1.
-
- Args:
- data (str): The input string to be converted.
-
- Returns:
- int: The converted integer value minus 1.
- """
- data=int(data)-1
- returndata
-
- hyperedges_info=request_text_from_url(hyperedges_path)
- hyperedges_info=hyperedges_info.strip()
- hyperedges_lst=hyperedges_info.split("\n")
- forhyperedgeinhyperedges_lst:
- hyperedge=hyperedge.strip()
- hyperedge=[int(i)-1foriinhyperedge.split(",")]
- self._hyperedges.append(tuple(hyperedge))
- # print(self.hyperedges)
-
- node_labels_info=request_text_from_url(node_labels_path)
-
- process_node_labels_info=self.process_label_txt(
- node_labels_info,transform_fun=fun
- )
- self._node_labels=process_node_labels_info
- # print("process_node_labels_info:", process_node_labels_info)
- node_names_info=request_text_from_url(node_names_path)
- process_node_names_info=self.process_label_txt(node_names_info)
- self._node_names=process_node_names_info
- # print("process_node_names_info:", process_node_names_info)
- label_names_info=request_text_from_url(label_names_path)
- process_label_names_info=self.process_label_txt(label_names_info)
- self._label_names=process_label_names_info
[docs]defrequest_text_from_url(url):
-"""Requests text data from the specified URL.
-
- Args:
- url (str): The URL from which to request data.
-
- Returns:
- str: The text content of the response if the request is successful.
-
- Raises:
- EasyGraphError: If a connection error occurs or the HTTP response status code indicates failure.
- """
- try:
- r=requests.get(url)
- exceptrequests.ConnectionError:
- raiseEasyGraphError("Connection Error!")
-
- ifr.ok:
- returnr.text
- else:
- raiseEasyGraphError(f"Error: HTTP response {r.status_code}")
-
-
-
[docs]classcontact_primary_school:
-"""A class for loading and processing the primary school contact network hypergraph dataset.
-
- This class loads hyperedge, node label, and label name data from specified URLs and generates a hypergraph.
-
- Attributes:
- data_root (str): The root URL for the data. If not provided, it is set to None.
- hyperedges_path (str): The URL for the hyperedge data.
- node_labels_path (str): The URL for the node label data.
- label_names_path (str): The URL for the label name data.
- _hyperedges (list): A list storing hyperedges.
- _node_labels (list): A list storing node labels.
- _label_names (list): A list storing label names.
- _node_names (list): A list storing node names (currently unused).
- _content (dict): A dictionary containing dataset statistics and data.
- """
-
- def__init__(self,data_root=None):
-"""Initializes an instance of the contact_primary_school class.
-
- Args:
- data_root (str, optional): The root URL for the data. Defaults to None.
- """
- self.data_root="https://"ifdata_rootisnotNoneelsedata_root
- self.hyperedges_path="https://gitlab.com/easy-graph/easygraph-data-contact-primary-school/-/raw/main/hyperedges-contact-primary-school.txt?inline=false"
- self.node_labels_path="https://gitlab.com/easy-graph/easygraph-data-contact-primary-school/-/raw/main/node-labels-contact-primary-school.txt?ref_type=heads&inline=false"
- # self.node_names_path = "https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/node-names-house-committees.txt?ref_type=heads&inline=false"
- self.label_names_path="https://gitlab.com/easy-graph/easygraph-data-contact-primary-school/-/raw/main/label-names-contact-primary-school.txt?ref_type=heads&inline=false"
- self._hyperedges=[]
- self._node_labels=[]
- self._label_names=[]
- self._node_names=[]
- self.generate_hypergraph(
- hyperedges_path=self.hyperedges_path,
- node_labels_path=self.node_labels_path,
- # node_names_path=self.node_names_path,
- label_names_path=self.label_names_path,
- )
- self._content={
- "num_classes":len(self._label_names),
- "num_vertices":len(self._node_labels),
- "num_edges":len(self._hyperedges),
- "edge_list":self._hyperedges,
- "labels":self._node_labels,
- }
-
- def__getitem__(self,key:str):
-"""Accesses data in the _content dictionary by key.
-
- Args:
- key (str): The key of the data to access.
-
- Returns:
- Any: The value corresponding to the key in the _content dictionary.
- """
- returnself._content[key]
-
-
[docs]defprocess_label_txt(self,data_str,delimiter="\n",transform_fun=str):
-"""Processes label data read from a text file.
-
- Args:
- data_str (str): A string containing label data.
- delimiter (str, optional): The delimiter used to split the string. Defaults to "\n".
- transform_fun (callable, optional): A function used to transform each label. Defaults to str.
-
- Returns:
- list: A list of processed labels.
- """
- data_str=data_str.strip()
- data_lst=data_str.split(delimiter)
- final_lst=[]
- fordataindata_lst:
- data=data.strip()
- data=transform_fun(data)
- final_lst.append(data)
- returnfinal_lst
-
- @property
- defnode_labels(self):
-"""Gets the list of node labels.
-
- Returns:
- list: A list of node labels.
- """
- returnself._node_labels
-
-"""
- @property
- def node_names(self):
- return self._node_names
- """
-
- @property
- deflabel_names(self):
-"""Gets the list of label names.
-
- Returns:
- list: A list of label names.
- """
- returnself._label_names
-
- @property
- defhyperedges(self):
-"""Gets the list of hyperedges.
-
- Returns:
- list: A list of hyperedges.
- """
- returnself._hyperedges
-
-
[docs]defgenerate_hypergraph(
- self,
- hyperedges_path=None,
- node_labels_path=None,
- # node_names_path=None,
- label_names_path=None,
- ):
-"""Generates hypergraph data from specified URLs.
-
- Args:
- hyperedges_path (str, optional): The URL for the hyperedge data. Defaults to None.
- node_labels_path (str, optional): The URL for the node label data. Defaults to None.
- label_names_path (str, optional): The URL for the label name data. Defaults to None.
- """
-
- deffun(data):
-"""Converts the input data to an integer and subtracts 1.
-
- Args:
- data (str): The input string data.
-
- Returns:
- int: The converted integer data.
- """
- data=int(data)-1
- returndata
-
- hyperedges_info=request_text_from_url(hyperedges_path)
- hyperedges_info=hyperedges_info.strip()
- hyperedges_lst=hyperedges_info.split("\n")
- forhyperedgeinhyperedges_lst:
- hyperedge=hyperedge.strip()
- hyperedge=[int(i)-1foriinhyperedge.split(",")]
- self._hyperedges.append(tuple(hyperedge))
- # print(self.hyperedges)
-
- node_labels_info=request_text_from_url(node_labels_path)
-
- process_node_labels_info=self.process_label_txt(
- node_labels_info,transform_fun=fun
- )
- self._node_labels=process_node_labels_info
- label_names_info=request_text_from_url(label_names_path)
- process_label_names_info=self.process_label_txt(label_names_info)
- self._label_names=process_label_names_info
[docs]classCooking200(BaseData):
-r"""The Cooking 200 dataset is collected from `Yummly.com <https://www.yummly.com/>`_ for vertex classification task.
- It is a hypergraph dataset, in which vertex denotes the dish and hyperedge denotes
- the ingredient. Each dish is also associated with category information, which indicates the dish's cuisine like
- Chinese, Japanese, French, and Russian.
-
- The content of the Cooking200 dataset includes the following:
-
- - ``num_classes``: The number of classes: :math:`20`.
- - ``num_vertices``: The number of vertices: :math:`7,403`.
- - ``num_edges``: The number of edges: :math:`2,755`.
- - ``edge_list``: The edge list. ``List`` with length :math:`(2,755)`.
- - ``labels``: The label list. ``torch.LongTensor`` with size :math:`(7,403)`.
- - ``train_mask``: The train mask. ``torch.BoolTensor`` with size :math:`(7,403)`.
- - ``val_mask``: The validation mask. ``torch.BoolTensor`` with size :math:`(7,403)`.
- - ``test_mask``: The test mask. ``torch.BoolTensor`` with size :math:`(7,403)`.
-
- Args:
- ``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to ``None``.
- """
-
- def__init__(self,data_root:Optional[str]=None)->None:
- super().__init__("cooking_200",data_root)
- self._content={
- "num_classes":20,
- "num_vertices":7403,
- "num_edges":2755,
- "edge_list":{
- "upon":[
- {
- "filename":"edge_list.pkl",
- "md5":"2cd32e13dd4e33576c43936542975220",
- }
- ],
- "loader":load_from_pickle,
- },
- "labels":{
- "upon":[
- {
- "filename":"labels.pkl",
- "md5":"f1f3c0399c9c28547088f44e0bfd5c81",
- }
- ],
- "loader":load_from_pickle,
- "preprocess":[to_long_tensor],
- },
- "train_mask":{
- "upon":[
- {
- "filename":"train_mask.pkl",
- "md5":"66ea36bae024aaaed289e1998fe894bd",
- }
- ],
- "loader":load_from_pickle,
- "preprocess":[to_bool_tensor],
- },
- "val_mask":{
- "upon":[
- {
- "filename":"val_mask.pkl",
- "md5":"6c0d3d8b752e3955c64788cc65dcd018",
- }
- ],
- "loader":load_from_pickle,
- "preprocess":[to_bool_tensor],
- },
- "test_mask":{
- "upon":[
- {
- "filename":"test_mask.pkl",
- "md5":"0e1564904551ba493e1f8a09d103461e",
- }
- ],
- "loader":load_from_pickle,
- "preprocess":[to_bool_tensor],
- },
- }
[docs]classBaseData:
-r"""The Base Class of all datasets.
-
- ::
-
- self._content = {
- 'item': {
- 'upon': [
- {'filename': 'part1.pkl', 'md5': 'xxxxx',},
- {'filename': 'part2.pkl', 'md5': 'xxxxx',},
- ],
- 'loader': loader_function,
- 'preprocess': [datapipe1, datapipe2],
- },
- ...
- }
-
- """
-
- def__init__(self,name:str,data_root=None):
- # configure the data local/remote root
- self.name=name
- ifdata_rootisNone:
- self.data_root=DATASETS_ROOT/name
- else:
- self.data_root=Path(data_root)/name
- self.remote_root=REMOTE_DATASETS_ROOT+name+"/"
- # init
- self._content={}
- self._raw={}
-
- def__repr__(self)->str:
- return(
- f"This is {self.name} dataset:\n"
- +"\n".join(f" -> {k}"forkinself.content)
- +"\nPlease try `data['name']` to get the specified data."
- )
-
- @property
- defcontent(self):
-r"""Return the content of the dataset."""
- returnlist(self._content.keys())
-
-
[docs]defneeds_to_load(self,item_name:str)->bool:
-r"""Return whether the ``item_name`` of the dataset needs to be loaded.
-
- Args:
- ``item_name`` (``str``): The name of the item in the dataset.
- """
- assertitem_nameinself.content,f"{item_name} is not provided in the Data"
- return(
- isinstance(self._content[item_name],dict)
- and"upon"inself._content[item_name]
- and"loader"inself._content[item_name]
- )
-
- def__getitem__(self,key:str)->Any:
- ifself.needs_to_load(key):
- cur_cfg=self._content[key]
- ifcur_cfg.get("cache",None)isNone:
- # get raw data
- item=self.raw(key)
- # preprocess and cache
- pipes=cur_cfg.get("preprocess",None)
- ifpipesisnotNone:
- cur_cfg["cache"]=compose_pipes(*pipes)(item)
- else:
- cur_cfg["cache"]=item
- returncur_cfg["cache"]
- else:
- returnself._content[key]
-
-
[docs]defraw(self,key:str)->Any:
-r"""Return the ``key`` of the dataset with un-preprocessed format."""
- ifself.needs_to_load(key):
- cur_cfg=self._content[key]
- ifself._raw.get(key,None)isNone:
- upon=cur_cfg["upon"]
- iflen(upon)==0:
- returnNone
- self.fetch_files(cur_cfg["upon"])
- file_path_list=[
- self.data_root/u["filename"]foruincur_cfg["upon"]
- ]
- iflen(file_path_list)==1:
- self._raw[key]=cur_cfg["loader"](file_path_list[0])
- else:
- # here, you should implement a multi-file loader
- self._raw[key]=cur_cfg["loader"](file_path_list)
- returnself._raw[key]
- else:
- returnself._content[key]
-
-
[docs]deffetch_files(self,files:List[Dict[str,str]]):
-r"""Download and check the files if they are not exist.
-
- Args:
- ``files`` (``List[Dict[str, str]]``): The files to download, each element
- in the list is a dict with at lease two keys: ``filename`` and ``md5``.
- If extra key ``bk_url`` is provided, it will be used to download the
- file from the backup url.
- """
- forfileinfiles:
- cur_filename=file["filename"]
- cur_url=file.get("bk_url",None)
- ifcur_urlisNone:
- cur_url=self.remote_root+cur_filename
- download_and_check(cur_url,self.data_root/cur_filename,file["md5"])
[docs]defrequest_text_from_url(url):
-"""
- Requests text content from the given URL.
-
- Args:
- url (str): The URL from which to request text data.
-
- Returns:
- str: The text content of the response if the request is successful.
-
- Raises:
- EasyGraphError: If a connection error occurs during the request or if the HTTP response status code is not OK.
- """
- try:
- r=requests.get(url)
- exceptrequests.ConnectionError:
- raiseEasyGraphError("Connection Error!")
-
- ifr.ok:
- returnr.text
- else:
- raiseEasyGraphError(f"Error: HTTP response {r.status_code}")
-
-
-
[docs]classwalmart_trips:
-"""
- A class for loading and processing the Walmart trips hypergraph dataset.
-
- This class fetches hyperedge, node label, and label name data from predefined URLs,
- processes the data, and generates a hypergraph representation. It also provides access
- to various dataset attributes through properties and indexing.
-
- Attributes:
- data_root (str): The root URL for the data. If provided during initialization, it is set to "https://";
- otherwise, it is None.
- hyperedges_path (str): The URL of the file containing hyperedge information.
- node_labels_path (str): The URL of the file containing node label information.
- label_names_path (str): The URL of the file containing label name information.
- _hyperedges (list): A list of tuples representing hyperedges.
- _node_labels (list): A list of node labels.
- _label_names (list): A list of label names.
- _node_names (list): An empty list reserved for node names (currently unused).
- _content (dict): A dictionary containing dataset statistics and data, such as the number of classes,
- vertices, edges, the edge list, and node labels.
- """
-
- def__init__(self,data_root=None,local_path=None):
-"""
- Initializes an instance of the walmart_trips class.
-
- Args:
- data_root (str, optional): The root URL for the data. If provided, it is set to "https://";
- otherwise, it is None. Defaults to None.
- local_path (str, optional): Currently unused. Defaults to None.
- """
- self.data_root="https://"ifdata_rootisnotNoneelsedata_root
- self.hyperedges_path="https://gitlab.com/easy-graph/easygraph-data-walmart-trips/-/raw/main/hyperedges-walmart-trips.txt?inline=false"
- self.node_labels_path="https://gitlab.com/easy-graph/easygraph-data-walmart-trips/-/raw/main/node-labels-walmart-trips.txt?ref_type=heads&inline=false"
- # self.node_names_path = "https://gitlab.com/easy-graph/easygraph-data-walmart-trips/-/raw/main/node-names-house-committees.txt?ref_type=heads&inline=false"
- self.label_names_path="https://gitlab.com/easy-graph/easygraph-data-walmart-trips/-/raw/main/label-names-walmart-trips.txt?ref_type=heads&inline=false"
- self._hyperedges=[]
- self._node_labels=[]
- self._label_names=[]
- self._node_names=[]
-
- self.generate_hypergraph(
- hyperedges_path=self.hyperedges_path,
- node_labels_path=self.node_labels_path,
- # node_names_path=self.node_names_path,
- label_names_path=self.label_names_path,
- )
-
- self._content={
- "num_classes":len(self._label_names),
- "num_vertices":len(self._node_labels),
- "num_edges":len(self._hyperedges),
- "edge_list":self._hyperedges,
- "labels":self._node_labels,
- }
-
- def__getitem__(self,key:str):
-"""
- Retrieves a value from the _content dictionary using the specified key.
-
- Args:
- key (str): The key used to access the _content dictionary.
-
- Returns:
- Any: The value corresponding to the key in the _content dictionary.
- """
- returnself._content[key]
-
-
[docs]defprocess_label_txt(self,data_str,delimiter="\n",transform_fun=str):
-"""
- Processes a string containing label data into a list of transformed values.
-
- Args:
- data_str (str): The input string containing label data.
- delimiter (str, optional): The delimiter used to split the input string. Defaults to "\n".
- transform_fun (callable, optional): A function used to transform each label value.
- Defaults to the str function.
-
- Returns:
- list: A list of transformed label values.
- """
- data_str=data_str.strip()
- data_lst=data_str.split(delimiter)
- final_lst=[]
- fordataindata_lst:
- data=data.strip()
- data=transform_fun(data)
- final_lst.append(data)
- returnfinal_lst
-
- @property
- defnode_labels(self):
-"""
- Gets the list of node labels.
-
- Returns:
- list: A list of node labels.
- """
- returnself._node_labels
-
-"""
- @property
- def node_names(self):
- return self._node_names
- """
-
- @property
- deflabel_names(self):
-"""
- Gets the list of label names.
-
- Returns:
- list: A list of label names.
- """
- returnself._label_names
-
- @property
- defhyperedges(self):
-"""
- Gets the list of hyperedges.
-
- Returns:
- list: A list of tuples representing hyperedges.
- """
- returnself._hyperedges
-
-
[docs]defgenerate_hypergraph(
- self,
- hyperedges_path=None,
- node_labels_path=None,
- # node_names_path=None,
- label_names_path=None,
- ):
-"""
- Generates a hypergraph by fetching and processing data from the specified URLs.
-
- Args:
- hyperedges_path (str, optional): The URL of the file containing hyperedge information.
- Defaults to None.
- node_labels_path (str, optional): The URL of the file containing node label information.
- Defaults to None.
- label_names_path (str, optional): The URL of the file containing label name information.
- Defaults to None.
- """
-
- deffun(data):
-"""
- Converts a string to an integer and subtracts 1.
-
- Args:
- data (str): The input string to be converted.
-
- Returns:
- int: The converted integer value minus 1.
- """
- data=int(data)-1
- returndata
-
- hyperedges_info=request_text_from_url(hyperedges_path)
- hyperedges_info=hyperedges_info.strip()
- hyperedges_lst=hyperedges_info.split("\n")
- forhyperedgeinhyperedges_lst:
- hyperedge=hyperedge.strip()
- hyperedge=[int(i)-1foriinhyperedge.split(",")]
- self._hyperedges.append(tuple(hyperedge))
- # print(self.hyperedges)
-
- node_labels_info=request_text_from_url(node_labels_path)
-
- process_node_labels_info=self.process_label_txt(
- node_labels_info,transform_fun=fun
- )
- self._node_labels=process_node_labels_info
- # print("process_node_labels_info:", process_node_labels_info)
- # print("process_node_names_info:", process_node_names_info)
- label_names_info=request_text_from_url(label_names_path)
- process_label_names_info=self.process_label_txt(label_names_info)
- self._label_names=process_label_names_info
[docs]classKarateClubDataset(EasyGraphDataset):
-"""Karate Club dataset for Node Classification
-
- Zachary's karate club is a social network of a university
- karate club, described in the paper "An Information Flow
- Model for Conflict and Fission in Small Groups" by Wayne W. Zachary.
- The network became a popular example of community structure in
- networks after its use by Michelle Girvan and Mark Newman in 2002.
- Official website: `<http://konect.cc/networks/ucidata-zachary/>`_
-
- Karate Club dataset statistics:
-
- - Nodes: 34
- - Edges: 156
- - Number of Classes: 2
-
- Parameters
- ----------
- transform : callable, optional
- A transform that takes in a :class:`~eg.Graph` object and returns
- a transformed version. The :class:`~eg.Graph` object will be
- transformed before every access.
-
- Attributes
- ----------
- num_classes : int
- Number of node classes
-
- Examples
- --------
- >>> dataset = KarateClubDataset()
- >>> num_classes = dataset.num_classes
- >>> g = dataset[0]
- >>> labels = g.ndata['label']
- """
-
- def__init__(self,transform=None):
- super(KarateClubDataset,self).__init__(name="karate_club",transform=transform)
-
-
-
-
-defcheck_sha1(filename,sha1_hash):
-"""Check whether the sha1 hash of the file content matches the expected hash.
-
- Codes borrowed from mxnet/gluon/utils.py
-
- Parameters
- ----------
- filename : str
- Path to the file.
- sha1_hash : str
- Expected sha1 hash in hexadecimal digits.
-
- Returns
- -------
- bool
- Whether the file content matches the expected hash.
- """
- sha1=hashlib.sha1()
- withopen(filename,"rb")asf:
- whileTrue:
- data=f.read(1048576)
- ifnotdata:
- break
- sha1.update(data)
-
- returnsha1.hexdigest()==sha1_hash
-
-
-
[docs]defgenerate_mask_tensor(mask):
-"""Generate mask tensor according to different backend
- For torch, it will create a bool tensor
- Parameters
- ----------
- mask: numpy ndarray
- input mask tensor
- """
- assertisinstance(
- mask,np.ndarray
- ),"input for generate_mask_tensor should be an numpy ndarray"
- returntensor(mask,dtype=data_type_dict()["bool"])
-
-
-defdeprecate_property(old,new):
- warnings.warn(
- "Property {} will be deprecated, please use {} instead.".format(old,new)
- )
-
-
-defcheck_file(file_path:Path,md5:str):
-r"""Check if a file is valid.
-
- Args:
- ``file_path`` (``Path``): The local path of the file.
- ``md5`` (``str``): The md5 of the file.
-
- Raises:
- FileNotFoundError: Not found the file.
- """
- ifnotfile_path.exists():
- raiseFileNotFoundError(f"{file_path} does not exist.")
- else:
- withopen(file_path,"rb")asf:
- data=f.read()
- cur_md5=hashlib.md5(data).hexdigest()
- returncur_md5==md5
-
-
-defdownload_file(url:str,file_path:Path):
-r"""Download a file from a url.
-
- Args:
- ``url`` (``str``): the url of the file
- ``file_path`` (``str``): the path to the file
- """
- file_path.parent.mkdir(parents=True,exist_ok=True)
- r=requests.get(url,stream=True,verify=True)
- ifr.status_code!=200:
- raiserequests.HTTPError(f"{url} is not accessible.")
- withopen(file_path,"wb")asf:
- forchunkinr.iter_content(chunk_size=1024):
- ifchunk:
- f.write(chunk)
-
-
-@_retry(3)
-defdownload_and_check(url:str,file_path:Path,md5:str):
-r"""Download a file from a url and check its integrity.
-
- Args:
- ``url`` (``str``): The url of the file.
- ``file_path`` (``Path``): The path to the file.
- ``md5`` (``str``): The md5 of the file.
- """
- ifnotfile_path.exists():
- download_file(url,file_path)
- ifnotcheck_file(file_path,md5):
- file_path.unlink()
- raiseValueError(
- f"{file_path} is corrupted. We will delete it, and try to download it"
- " again."
- )
- returnTrue
-
[docs]classEasyGraphException(Exception):
-"""Base class for exceptions in EasyGraph."""
-
-
-
[docs]classEasyGraphError(EasyGraphException):
-"""Exception for a serious error in EasyGraph"""
-
-
-
[docs]classEasyGraphPointlessConcept(EasyGraphException):
-"""Raised when a null graph is provided as input to an algorithm
- that cannot use it.
-
- The null graph is sometimes considered a pointless concept [1]_,
- thus the name of the exception.
-
- References
- ----------
- .. [1] Harary, F. and Read, R. "Is the Null Graph a Pointless
- Concept?" In Graphs and Combinatorics Conference, George
- Washington University. New York: Springer-Verlag, 1973.
-
- """
-
-
-
[docs]classEasyGraphAlgorithmError(EasyGraphException):
-"""Exception for unexpected termination of algorithms."""
-
-
-
[docs]classEasyGraphUnfeasible(EasyGraphAlgorithmError):
-"""Exception raised by algorithms trying to solve a problem
- instance that has no feasible solution."""
-
-
-
[docs]classEasyGraphNoPath(EasyGraphUnfeasible):
-"""Exception for algorithms that should return a path when running
- on graphs where such a path does not exist."""
-
-
-
[docs]classEasyGraphNoCycle(EasyGraphUnfeasible):
-"""Exception for algorithms that should return a cycle when running
- on graphs where such a cycle does not exist."""
-
-
-
[docs]classHasACycle(EasyGraphException):
-"""Raised if a graph has a cycle when an algorithm expects that it
- will have no cycles.
-
- """
-
-
-
[docs]classEasyGraphUnbounded(EasyGraphAlgorithmError):
-"""Exception raised by algorithms trying to solve a maximization
- or a minimization problem instance that is unbounded."""
-
-
-
[docs]classEasyGraphNotImplemented(EasyGraphException):
-"""Exception raised by algorithms not implemented for a type of graph."""
-
-
-
[docs]classNodeNotFound(EasyGraphException):
-"""Exception raised if requested node is not present in the graph"""
[docs]classBaseTask:
-r"""The base class of Auto-experiment in EasyGraph.
-
- Args:
- ``work_root`` (``Optional[Union[str, Path]]``): User's work root to store all studies.
- ``data`` (``dict``): The dictionary to store input data that used in the experiment.
- ``model_builder`` (``Callable``): The function to build a model with a fixed parameter ``trial``.
- ``train_builder`` (``Callable``): The function to build a training configuration with two fixed parameters ``trial`` and ``model``.
- ``evaluator`` (``eg.ml_metrics.BaseEvaluator``): The EasyGraph evaluator object to evaluate performance of the model in the experiment.
- ``device`` (``torch.device``): The target device to run the experiment.
- ``structure_builder`` (``Optional[Callable]``): The function to build a structure with a fixed parameter ``trial``. The structure can be ``eg.Graph``, ``eg.DiGraph``, ``eg.BiGraph``, and ``eg.Hypergraph``.
- ``study_name`` (``Optional[str]``): The name of this study. If set to ``None``, the study name will be generated automatically according to current time. Defaults to ``None``.
- ``overwrite`` (``bool``): The flag that whether to overwrite the existing study. Different studies are identified by the ``study_name``. Defaults to ``True``.
- """
-
- def__init__(
- self,
- work_root:Optional[Union[str,Path]],
- data:dict,
- model_builder:Callable,
- train_builder:Callable,
- evaluator:BaseEvaluator,
- device:torch.device,
- structure_builder:Optional[Callable]=None,
- study_name:Optional[str]=None,
- overwrite:bool=True,
- ):
- self.data=data
- self.model_builder=model_builder
- self.train_builder=train_builder
- self.structure_builder=structure_builder
- self.evaluator=evaluator
- self.device=device
- self.study=None
- ifstudy_nameisNone:
- self.study_name=time.strftime("%Y-%m-%d--%H-%M-%S",time.localtime())
- else:
- self.study_name=study_name
- work_root=Path(work_root)
- self.study_root=work_root/self.study_name
- ifoverwriteandself.study_root.exists():
- shutil.rmtree(self.study_root)
- self.log_file=self.study_root/"log.txt"
- self.cache_root=self.study_root/"cache"
- ifnotwork_root.exists():
- ifwork_root.parent.exists():
- work_root.mkdir(exist_ok=True)
- else:
- raiseValueError(f"The work_root {work_root} does not exist.")
- self.study_root.mkdir(exist_ok=True)
- self.cache_root.mkdir(exist_ok=True)
- # configure logging
- self.logger=optuna.logging.get_logger("optuna")
- self.logger.setLevel(logging.INFO)
- out_file_handler=logging.FileHandler(self.log_file,mode="a",encoding="utf8")
- out_file_handler.setFormatter(default_log_formatter())
- self.logger.addHandler(out_file_handler)
- self.logger.info(f"Logs will be saved to {self.log_file.absolute()}")
- self.logger.info(
- f"Files in training will be saved in {self.study_root.absolute()}"
- )
-
-
[docs]@torch.no_grad()
- @abc.abstractmethod
- deftest(self,data:Optional[dict]=None,model:Optional[nn.Module]=None):
-r"""Test the model.
-
- Args:
- ``data`` (``dict``, optional): The input data if set to ``None``, the specified ``data`` in the initialization of the experiments will be used. Defaults to ``None``.
- ``model`` (``nn.Module``, optional): The model if set to ``None``, the trained best model will be used. Defaults to ``None``.
- """
[docs]classHypergraphVertexClassificationTask(VertexClassificationTask):
-r"""The auto-experiment class for the vertex classification task on hypergraph.
-
- Args:
- ``work_root`` (``Optional[Union[str, Path]]``): User's work root to store all studies.
- ``data`` (``dict``): The dictionary to store input data that used in the experiment.
- ``model_builder`` (``Callable``): The function to build a model with a fixed parameter ``trial``.
- ``train_builder`` (``Callable``): The function to build a training configuration with two fixed parameters ``trial`` and ``model``.
- ``evaluator`` (``easygraph.ml_metrics.BaseEvaluator``): The DHG evaluator object to evaluate performance of the model in the experiment.
- ``device`` (``torch.device``): The target device to run the experiment.
- ``structure_builder`` (``Optional[Callable]``): The function to build a structure with a fixed parameter ``trial``. The structure should be ``easygraph.Hypergraph``.
- ``study_name`` (``Optional[str]``): The name of this study. If set to ``None``, the study name will be generated automatically according to current time. Defaults to ``None``.
- ``overwrite`` (``bool``): The flag that whether to overwrite the existing study. Different studies are identified by the ``study_name``. Defaults to ``True``.
- """
-
- def__init__(
- self,
- work_root:Optional[Union[str,Path]],
- data:dict,
- model_builder:Callable,
- train_builder:Callable,
- evaluator:BaseEvaluator,
- device:torch.device,
- structure_builder:Optional[Callable]=None,
- study_name:Optional[str]=None,
- overwrite:bool=True,
- ):
- super().__init__(
- work_root,
- data,
- model_builder,
- train_builder,
- evaluator,
- device,
- structure_builder=structure_builder,
- study_name=study_name,
- overwrite=overwrite,
- )
-
-
[docs]defto(self,device:torch.device):
-r"""Move the input data to the target device.
-
- Args:
- ``device`` (``torch.device``): The specified target device to store the input data.
- """
- returnsuper().to(device)
-
- @property
- defvars_for_DL(self):
-r"""Return a name list for available variables for deep learning in the vertex classification on hypergraph. The name list includes ``features``, ``structure``, ``labels``, ``train_mask``, ``val_mask``, and ``test_mask``.
- """
- returnsuper().vars_for_DL
-
-
[docs]defexperiment(self,trial:optuna.Trial):
-r"""Run the experiment for a given trial.
-
- Args:
- ``trial`` (``optuna.Trial``): The ``optuna.Trial`` object.
- """
- returnsuper().experiment(trial)
-
-
[docs]defrun(self,max_epoch:int,num_trials:int=1,direction:str="maximize"):
-r"""Run experiments with automatically hyper-parameter tuning.
-
- Args:
- ``max_epoch`` (``int``): The maximum number of epochs to train for each experiment.
- ``num_trials`` (``int``): The number of trials to run. Defaults to ``1``.
- ``direction`` (``str``): The direction to optimize. Defaults to ``"maximize"``.
- """
- returnsuper().run(max_epoch,num_trials,direction)
-
-
[docs]deftrain(
- self,
- data:dict,
- model:nn.Module,
- optimizer:torch.optim.Optimizer,
- criterion:nn.Module,
- ):
-r"""Train model for one epoch.
-
- Args:
- ``data`` (``dict``): The input data.
- ``model`` (``nn.Module``): The model.
- ``optimizer`` (``torch.optim.Optimizer``): The model optimizer.
- ``criterion`` (``nn.Module``): The loss function.
- """
- returnsuper().train(data,model,optimizer,criterion)
-
-
[docs]@torch.no_grad()
- defvalidate(self,data:dict,model:nn.Module):
-r"""Validate the model.
-
- Args:
- ``data`` (``dict``): The input data.
- ``model`` (``nn.Module``): The model.
- """
- returnsuper().validate(data,model)
-
-
[docs]@torch.no_grad()
- deftest(self,data:Optional[dict]=None,model:Optional[nn.Module]=None):
-r"""Test the model.
-
- Args:
- ``data`` (``dict``, optional): The input data if set to ``None``, the specified ``data`` in the intialization of the experiments will be used. Defaults to ``None``.
- ``model`` (``nn.Module``, optional): The model if set to ``None``, the trained best model will be used. Defaults to ``None``.
- """
- returnsuper().test(data,model)
[docs]classVertexClassificationTask(BaseTask):
-r"""The auto-experiment class for the vertex classification task.
-
- Args:
- ``work_root`` (``Optional[Union[str, Path]]``): User's work root to store all studies.
- ``data`` (``dict``): The dictionary to store input data that used in the experiment.
- ``model_builder`` (``Callable``): The function to build a model with a fixed parameter ``trial``.
- ``train_builder`` (``Callable``): The function to build a training configuration with two fixed parameters ``trial`` and ``model``.
- ``evaluator`` (``eg.ml_metrics.BaseEvaluator``): The DHG evaluator object to evaluate performance of the model in the experiment.
- ``device`` (``torch.device``): The target device to run the experiment.
- ``structure_builder`` (``Optional[Callable]``): The function to build a structure with a fixed parameter ``trial``. The structure can be ``eg.Hypergraph``.
- ``study_name`` (``Optional[str]``): The name of this study. If set to ``None``, the study name will be generated automatically according to current time. Defaults to ``None``.
- ``overwrite`` (``bool``): The flag that whether to overwrite the existing study. Different studies are identified by the ``study_name``. Defaults to ``True``.
- """
-
- def__init__(
- self,
- work_root:Optional[Union[str,Path]],
- data:dict,
- model_builder:Callable,
- train_builder:Callable,
- evaluator:BaseEvaluator,
- device:torch.device,
- structure_builder:Optional[Callable]=None,
- study_name:Optional[str]=None,
- overwrite:bool=True,
- ):
- super().__init__(
- work_root,
- data,
- model_builder,
- train_builder,
- evaluator,
- device,
- structure_builder=structure_builder,
- study_name=study_name,
- overwrite=overwrite,
- )
- self.to(self.device)
-
-
[docs]defto(self,device:torch.device):
-r"""Move the input data to the target device.
-
- Args:
- ``device`` (``torch.device``): The specified target device to store the input data.
- """
- self.device=device
- fornameinself.vars_for_DL:
- ifnameinself.data.keys():
- self.data[name]=self.data[name].to(device)
- returnself
-
- @property
- defvars_for_DL(self):
-r"""Return a name list for available variables for deep learning in the vertex classification task. The name list includes ``features``, ``structure``, ``labels``, ``train_mask``, ``val_mask``, and ``test_mask``.
- """
- return(
- "features",
- "structure",
- "labels",
- "train_mask",
- "val_mask",
- "test_mask",
- )
-
-
[docs]defexperiment(self,trial:optuna.Trial):
-r"""Run the experiment for a given trial.
-
- Args:
- ``trial`` (``optuna.Trial``): The ``optuna.Trial`` object.
- """
- returnsuper().experiment(trial)
-
-
[docs]defrun(self,max_epoch:int,num_trials:int=1,direction:str="maximize"):
-r"""Run experiments with automatically hyper-parameter tuning.
-
- Args:
- ``max_epoch`` (``int``): The maximum number of epochs to train for each experiment.
- ``num_trials`` (``int``): The number of trials to run. Defaults to ``1``.
- ``direction`` (``str``): The direction to optimize. Defaults to ``"maximize"``.
- """
- returnsuper().run(max_epoch,num_trials,direction)
-
-
[docs]deftrain(
- self,
- data:dict,
- model:nn.Module,
- optimizer:torch.optim.Optimizer,
- criterion:nn.Module,
- ):
-r"""Train model for one epoch.
-
- Args:
- ``data`` (``dict``): The input data.
- ``model`` (``nn.Module``): The model.
- ``optimizer`` (``torch.optim.Optimizer``): The model optimizer.
- ``criterion`` (``nn.Module``): The loss function.
- """
- features,structure=data["features"],data["structure"]
- train_mask,labels=data["train_mask"],data["labels"]
- model.train()
- optimizer.zero_grad()
- outputs=model(features,structure)
- loss=criterion(
- outputs[train_mask],
- labels[train_mask],
- )
- loss.backward()
- optimizer.step()
[docs]@torch.no_grad()
- deftest(self,data:Optional[dict]=None,model:Optional[nn.Module]=None):
-r"""Test the model.
-
- Args:
- ``data`` (``dict``, optional): The input data if set to ``None``, the specified ``data`` in the initialization of the experiments will be used. Defaults to ``None``.
- ``model`` (``nn.Module``, optional): The model if set to ``None``, the trained best model will be used. Defaults to ``None``.
- """
- ifdataisNone:
- features,structure=self.data["features"],self.best_structure
- test_mask,labels=self.data["test_mask"],self.data["labels"]
- else:
- features,structure=(
- data["features"].to(self.device),
- data["structure"].to(self.device),
- )
- test_mask,labels=(
- data["test_mask"].to(self.device),
- data["labels"].to(self.device),
- )
- ifmodelisNone:
- model=self.best_model
- model=model.to(self.device)
- model.eval()
- outputs=model(features,structure)
- res=self.evaluator.test(labels[test_mask],outputs[test_mask])
- returnres
Source code for easygraph.functions.basic.avg_degree
-__all__=[
- "average_degree",
-]
-
-
-
[docs]defaverage_degree(G)->float:
-"""Returns the average degree of the graph.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph
-
- Returns
- -------
- average degree : float
- The average degree of the graph.
-
- Notes
- -----
- Self loops are counted twice in the total degree of a node.
-
- Examples
- --------
- >>> G = eg.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc
- >>> G.add_edge(1, 2)
- >>> G.add_edge(2, 3)
- >>> eg.average_degree(G)
- 1.3333333333333333
- """
- returnG.number_of_edges()/G.number_of_nodes()*2
[docs]defaverage_clustering(G,nodes=None,weight=None,count_zeros=True,n_workers=None):
-r"""Compute the average clustering coefficient for the graph G.
-
- The clustering coefficient for the graph is the average,
-
- .. math::
-
- C = \frac{1}{n}\sum_{v \in G} c_v,
-
- where :math:`n` is the number of nodes in `G`.
-
- Parameters
- ----------
- G : graph
-
- nodes : container of nodes, optional (default=all nodes in G)
- Compute average clustering for nodes in this container.
-
- weight : string or None, optional (default=None)
- The edge attribute that holds the numerical value used as a weight.
- If None, then each edge has weight 1.
-
- count_zeros : bool
- If False include only the nodes with nonzero clustering in the average.
-
- Returns
- -------
- avg : float
- Average clustering
-
- Examples
- --------
- >>> G = eg.complete_graph(5)
- >>> print(eg.average_clustering(G))
- 1.0
-
- Notes
- -----
- This is a space saving routine; it might be faster
- to use the clustering function to get a list and then take the average.
-
- Self loops are ignored.
-
- References
- ----------
- .. [1] Generalizations of the clustering coefficient to weighted
- complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
- K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
- http://jponnela.com/web_documents/a9.pdf
- .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated
- nodes and leafs on clustering measures for small-world networks.
- https://arxiv.org/abs/0802.2512
- """
- c=clustering(G,nodes,weight=weight,n_workers=n_workers).values()
- ifnotcount_zeros:
- c=[vforvincifabs(v)>0]
- returnsum(c)/len(c)
[docs]@hybrid("cpp_clustering")
-defclustering(G,nodes=None,weight=None,n_workers=None):
-r"""Compute the clustering coefficient for nodes.
-
- For unweighted graphs, the clustering of a node :math:`u`
- is the fraction of possible triangles through that node that exist,
-
- .. math::
-
- c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},
-
- where :math:`T(u)` is the number of triangles through node :math:`u` and
- :math:`deg(u)` is the degree of :math:`u`.
-
- For weighted graphs, there are several ways to define clustering [1]_.
- the one used here is defined
- as the geometric average of the subgraph edge weights [2]_,
-
- .. math::
-
- c_u = \frac{1}{deg(u)(deg(u)-1))}
- \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.
-
- The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight
- in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`.
-
- The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`.
-
- Additionally, this weighted definition has been generalized to support negative edge weights [3]_.
-
- For directed graphs, the clustering is similarly defined as the fraction
- of all possible directed triangles or geometric average of the subgraph
- edge weights for unweighted and weighted directed graph respectively [4]_.
-
- .. math::
-
- c_u = \frac{2}{deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u)}
- T(u),
-
- where :math:`T(u)` is the number of directed triangles through node
- :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of
- :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of
- :math:`u`.
-
-
- Parameters
- ----------
- G : graph
-
- nodes : container of nodes, optional (default=all nodes in G)
- Compute clustering for nodes in this container.
-
- weight : string or None, optional (default=None)
- The edge attribute that holds the numerical value used as a weight.
- If None, then each edge has weight 1.
-
- Returns
- -------
- out : float, or dictionary
- Clustering coefficient at specified nodes
-
- Examples
- --------
- >>> G = eg.complete_graph(5)
- >>> print(eg.clustering(G, 0))
- 1.0
- >>> print(eg.clustering(G))
- {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
-
- Notes
- -----
- Self loops are ignored.
-
- References
- ----------
- .. [1] Generalizations of the clustering coefficient to weighted
- complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
- K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
- http://jponnela.com/web_documents/a9.pdf
- .. [2] Intensity and coherence of motifs in weighted complex
- networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
- Physical Review E, 71(6), 065103 (2005).
- .. [3] Generalization of Clustering Coefficients to Signed Correlation Networks
- by G. Costantini and M. Perugini, PloS one, 9(2), e88669 (2014).
- .. [4] Clustering in complex directed networks by G. Fagiolo,
- Physical Review E, 76(2), 026107 (2007).
- """
-
- ifG.is_directed():
- ifweightisnotNone:
- td_iter=_directed_weighted_triangles_and_degree_iter(
- G,nodes,weight,n_workers=n_workers
- )
- clusterc={
- v:0ift==0elset/((dt*(dt-1)-2*db)*2)
- forv,dt,db,tintd_iter
- }
- else:
- td_iter=_directed_triangles_and_degree_iter(G,nodes,n_workers=n_workers)
- clusterc={
- v:0ift==0elset/((dt*(dt-1)-2*db)*2)
- forv,dt,db,tintd_iter
- }
- else:
- # The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T
- ifweightisnotNone:
- td_iter=_weighted_triangles_and_degree_iter(
- G,nodes,weight,n_workers=n_workers
- )
- clusterc={v:0ift==0elset/(d*(d-1))forv,d,tintd_iter}
- else:
- td_iter=_triangles_and_degree_iter(G,nodes,n_workers=n_workers)
- clusterc={v:0ift==0elset/(d*(d-1))forv,d,t,_intd_iter}
- ifnodesinG:
- # Return the value of the sole entry in the dictionary.
- returnclusterc[nodes]
- returnclusterc
[docs]defpredecessor(G,source,target=None,cutoff=None,return_seen=None):
-"""Returns dict of predecessors for the path from source to all nodes in G.
-
- Parameters
- ----------
- G : EasyGraph graph
-
- source : node label
- Starting node for path
-
- target : node label, optional
- Ending node for path. If provided only predecessors between
- source and target are returned
-
- cutoff : integer, optional
- Depth to stop the search. Only paths of length <= cutoff are returned.
-
- return_seen : bool, optional (default=None)
- Whether to return a dictionary, keyed by node, of the level (number of
- hops) to reach the node (as seen during breadth-first-search).
-
- Returns
- -------
- pred : dictionary
- Dictionary, keyed by node, of predecessors in the shortest path.
-
-
- (pred, seen): tuple of dictionaries
- If `return_seen` argument is set to `True`, then a tuple of dictionaries
- is returned. The first element is the dictionary, keyed by node, of
- predecessors in the shortest path. The second element is the dictionary,
- keyed by node, of the level (number of hops) to reach the node (as seen
- during breadth-first-search).
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> list(G)
- [0, 1, 2, 3]
- >>> eg.predecessor(G, 0)
- {0: [], 1: [0], 2: [1], 3: [2]}
- >>> eg.predecessor(G, 0, return_seen=True)
- ({0: [], 1: [0], 2: [1], 3: [2]}, {0: 0, 1: 1, 2: 2, 3: 3})
-
-
- """
-
- ifsourcenotinG:
- raiseeg.NodeNotFound(f"Source {source} not in G")
- level=0# the current level
- nextlevel=[source]# list of nodes to check at next level
- seen={source:level}# level (number of hops) when seen in BFS
- pred={source:[]}# predecessor dictionary
- whilenextlevel:
- level=level+1
- thislevel=nextlevel
- nextlevel=[]
- forvinthislevel:
- forwinlist(G.neighbors(v)):
- ifwnotinseen:
- pred[w]=[v]
- seen[w]=level
- nextlevel.append(w)
- elifseen[w]==level:# add v to predecessor list if it
- pred[w].append(v)# is at the correct level
- ifcutoffandcutoff<=level:
- break
-
- iftargetisnotNone:
- ifreturn_seen:
- iftargetnotinpred:
- return([],-1)# No predecessor
- return(pred[target],seen[target])
- else:
- iftargetnotinpred:
- return[]# No predecessor
- returnpred[target]
- else:
- ifreturn_seen:
- return(pred,seen)
- else:
- returnpred
[docs]@not_implemented_for("multigraph")
-@hybrid("cpp_betweenness_centrality")
-defbetweenness_centrality(
- G,weight=None,sources=None,normalized=True,endpoints=False,n_workers=None
-):
-r"""Compute the shortest-basic betweenness centrality for nodes.
-
- .. math::
-
- c_B(v) = \sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
-
- where V is the set of nodes,
-
- .. math::
- \sigma(s, t)
-
- is the number of shortest (s, t)-paths, and
-
- .. math::
-
- \sigma(s, t|v)
-
- is the number of those paths passing through some node v other than s, t.
-
- .. math::
-
- If\ s\ =\ t,\ \sigma(s, t) = 1, and\ if\ v \in {s, t}, \sigma(s, t|v) = 0 [2]_.
-
- Parameters
- ----------
- G : graph
- A easygraph graph.
-
- weight : None or string, optional (default=None)
- If None, all edge weights are considered equal.
- Otherwise holds the name of the edge attribute used as weight.
-
- sources : None or nodes list, optional (default=None)
- If None, all nodes are considered.
- Otherwise,the set of source vertices to consider when calculating shortest paths.
-
- normalized : bool, optional
- If True the betweenness values are normalized by `2/((n-1)(n-2))`
- for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
- is the number of nodes in G.
-
- endpoints : bool, optional
- If True include the endpoints in the shortest basic counts.
-
- Returns
- -------
-
- nodes : dictionary
- Dictionary of nodes with betweenness centrality as the value.
-
- >>> betweenness_centrality(G,weight="weight")
- """
-
- importfunctools
-
- ifweightisnotNone:
- path_length=functools.partial(_single_source_dijkstra_path,weight=weight)
- else:
- path_length=functools.partial(_single_source_bfs_path)
-
- ifendpoints:
- accumulate=functools.partial(_accumulate_endpoints)
- else:
- accumulate=functools.partial(_accumulate_basic)
-
- ifsourcesisnotNone:
- nodes=sources
- else:
- nodes=G.nodes
- betweenness=dict.fromkeys(G,0.0)
-
- ifn_workersisnotNone:
- # use the parallel version for large graph
- importrandom
-
- fromfunctoolsimportpartial
- frommultiprocessingimportPool
-
- nodes=list(nodes)
- random.shuffle(nodes)
-
- iflen(nodes)>n_workers*30000:
- nodes=split_len(nodes,step=30000)
- else:
- nodes=split(nodes,n_workers)
- local_function=partial(
- betweenness_centrality_parallel,
- G=G,
- path_length=path_length,
- accumulate=accumulate,
- )
- withPool(n_workers)asp:
- ret=p.imap(local_function,nodes)
- forresinret:
- forkeyinres:
- betweenness[key]+=res[key]
- else:
- # use np-parallel version for small graph
- fornodeinnodes:
- S,P,sigma=path_length(G,source=node)
- betweenness=accumulate(betweenness,S,P,sigma,node)
-
- betweenness=_rescale(
- betweenness,
- len(G),
- normalized=normalized,
- directed=G.is_directed(),
- endpoints=endpoints,
- )
- ret=[0.0foriinrange(len(G))]
- foriinrange(len(ret)):
- ret[i]=betweenness[G.index2node[i]]
- returnret
[docs]@not_implemented_for("multigraph")
-defdegree_centrality(G):
-"""Compute the degree centrality for nodes in a bipartite network.
-
- The degree centrality for a node v is the fraction of nodes it
- is connected to.
-
- parameters
- ----------
- G : graph
- A easygraph graph
-
- Returns
- -------
- nodes : dictionary
- Dictionary of nodes with degree centrality as the value.
-
- Notes
- -----
- The degree centrality are normalized by dividing by n-1 where
- n is number of nodes in G.
- """
- iflen(G)<=1:
- return{n:1forninG}
-
- s=1.0/(len(G)-1.0)
- centrality={n:d*sforn,din(G.degree()).items()}
- returncentrality
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_Directed_graph
-defin_degree_centrality(G):
-"""Compute the in-degree centrality for nodes.
-
- The in-degree centrality for a node v is the fraction of nodes its
- incoming edges are connected to.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph
-
- Returns
- -------
- nodes : dictionary
- Dictionary of nodes with in-degree centrality as values.
-
- Raises
- ------
- EasyGraphNotImplemented:
- If G is undirected.
-
- See Also
- --------
- degree_centrality, out_degree_centrality
-
- Notes
- -----
- The degree centrality values are normalized by dividing by the maximum
- possible degree in a simple graph n-1 where n is the number of nodes in G.
-
- For multigraphs or graphs with self loops the maximum degree might
- be higher than n-1 and values of degree centrality greater than 1
- are possible.
- """
- iflen(G)<=1:
- return{n:1forninG}
-
- s=1.0/(len(G)-1.0)
- centrality={n:d*sforn,dinG.in_degree()}
- returncentrality
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_Directed_graph
-defout_degree_centrality(G):
-"""Compute the out-degree centrality for nodes.
-
- The out-degree centrality for a node v is the fraction of nodes its
- outgoing edges are connected to.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph
-
- Returns
- -------
- nodes : dictionary
- Dictionary of nodes with out-degree centrality as values.
-
- Raises
- ------
- EasyGraphNotImplemented:
- If G is undirected.
-
- See Also
- --------
- degree_centrality, in_degree_centrality
-
- Notes
- -----
- The degree centrality values are normalized by dividing by the maximum
- possible degree in a simple graph n-1 where n is the number of nodes in G.
-
- For multigraphs or graphs with self loops the maximum degree might
- be higher than n-1 and values of degree centrality greater than 1
- are possible.
- """
- iflen(G)<=1:
- return{n:1forninG}
-
- s=1.0/(len(G)-1.0)
- centrality={n:d*sforn,dinG.out_degree()}
- returncentrality
[docs]@not_implemented_for("multigraph")
-defego_betweenness(G,node):
-"""
- ego networks are networks consisting of a single actor (ego) together with the actors they are connected to (alters) and all the links among those alters.[1]
- Burt (1992), in his book Structural Holes, provides ample evidence that having high betweenness centrality, which is highly correlated with having many structural holes, can bring benefits to ego.[1]
- Returns the betweenness centrality of a ego network whose ego is set
-
- Parameters
- ----------
- G : graph
- node : int
-
- Returns
- -------
- sum : float
- the betweenness centrality of a ego network whose ego is set
-
- Examples
- --------
- Returns the betwenness centrality of node 1.
-
- >>> ego_betweenness(G,node=1)
-
- Reference
- ---------
- .. [1] Martin Everett, Stephen P. Borgatti. "Ego network betweenness." Social Networks, Volume 27, Issue 1, Pages 31-38, 2005.
-
- """
- g=G.ego_subgraph(node)
- print(g.edges)
- print(g.nodes)
- n=len(g)
-
- A=np.zeros((n,n))
-
- foriinrange(n):
- forjinrange(n):
- ifg.has_edge(g.index2node[i],g.index2node[j]):
- A[i,j]=1
-
- B=A*A
- C=np.identity(n)-A
- sum=0
- flag=G.is_directed()
- foriinrange(n):
- forjinrange(n):
- ifi!=jandC[i,j]==1andB[i,j]!=0:
- sum+=1.0/B[i,j]
- ifflag==False:
- sum/=2
- returnsum
[docs]@not_implemented_for("multigraph")
-@hybrid("cpp_pagerank")
-defpagerank(G,alpha=0.85):
-"""
- Returns the PageRank value of each node in G.
-
- Parameters
- ----------
- G : graph
- Undirected graph will be considered as directed graph with two directed edges for each undirected edge.
-
- alpha : float
- The damping factor. Default is 0.85
-
- """
- importnumpyasnp
-
- iflen(G)==0:
- return{}
- M=google_matrix(G,alpha=alpha)
-
- # use numpy LAPACK solver
- eigenvalues,eigenvectors=np.linalg.eig(M.T)
- ind=np.argmax(eigenvalues)
- # eigenvector of largest eigenvalue is at ind, normalized
- largest=np.array(eigenvectors[:,ind]).flatten().real
- norm=float(largest.sum())
- returndict(zip(G,map(float,largest/norm)))
-
-
-defgoogle_matrix(G,alpha):
- importnumpyasnp
-
- M=eg.to_numpy_array(G)
- N=len(G)
- ifN==0:
- returnM
-
- # Get dangling nodes(nodes with no out link)
- dangling_nodes=np.where(M.sum(axis=1)==0)[0]
- dangling_weights=np.repeat(1.0/N,N)
- fornodeindangling_nodes:
- M[node]=dangling_weights
-
- M/=M.sum(axis=1)[:,np.newaxis]
-
- returnalpha*M+(1-alpha)*np.repeat(1.0/N,N)
-
[docs]@not_implemented_for("multigraph")
-defLPA(G):
-"""Detect community by label propagation algorithm
- Return the detected communities. But the result is random.
- Each node in the network is initially assigned to its own community. At every iteration,nodes have
- a label that the maximum number of their neighbors have. If there are more than one nodes fit and
- available, choose a label randomly. Finally, nodes having the same labels are grouped together as
- communities. In case two or more disconnected groups of nodes have the same label, we run a simple
- breadth-first search to separate the disconnected communities
-
- Parameters
- ----------
- G : graph
- A easygraph graph
-
- Returns
- ----------
- communities : dictionary
- key: serial number of community , value: nodes in the community.
-
- Examples
- ----------
- >>> LPA(G)
-
- References
- ----------
- .. [1] Usha Nandini Raghavan, Réka Albert, and Soundar Kumara:
- Near linear time algorithm to detect community structures in large-scale networks
- """
- i=0
- label_dict=dict()
- cluster_community=dict()
- Next_label_dict=dict()
- nodes=list(G.nodes.keys())
- iflen(nodes)==1:
- return{1:[nodes[0]]}
- fornodeinnodes:
- label_dict[node]=i
- i=i+1
- loop_count=0
- whileTrue:
- loop_count+=1
- random.shuffle(nodes)
- fornodeinnodes:
- labels=SelectLabels(G,node,label_dict)
- iflabels==[]:
- Next_label_dict[node]=label_dict[node]
- continue
- Next_label_dict[node]=random.choice(labels)
- # Asynchronous updates. If you want to use synchronous updates, comment the line below
- label_dict[node]=Next_label_dict[node]
- label_dict=Next_label_dict
- ifestimate_stop_cond(G,label_dict)isTrue:
- break
- fornodeinlabel_dict.keys():
- label=label_dict[node]
- iflabelnotincluster_community.keys():
- cluster_community[label]=[node]
- else:
- cluster_community[label].append(node)
-
- result_community=CheckConnectivity(G,cluster_community)
- returnresult_community
-
-
-
[docs]@not_implemented_for("multigraph")
-defSLPA(G,T,r):
-"""Detect Overlapping Communities by Speaker-listener Label Propagation Algorithm
- Return the detected Overlapping communities. But the result is random.
-
- Parameters
- ----------
- G : graph
- A easygraph graph.
- T : int
- The number of iterations, In general, T is set greater than 20, which produces relatively stable outputs.
- r : int
- a threshold between 0 and 1.
-
- Returns
- -------
- communities : dictionary
- key: serial number of community , value: nodes in the community.
-
- Examples
- ----------
- >>> SLPA(G,
- ... T = 20,
- ... r = 0.05
- ... )
-
- References
- ----------
- .. [1] Jierui Xie, Boleslaw K. Szymanski, Xiaoming Liu:
- SLPA: Uncovering Overlapping Communities in Social Networks via A Speaker-listener Interaction Dynamic Process
- """
- nodes=list(G.nodes.keys())
- iflen(nodes)==1:
- return{1:[nodes[0]]}
- nodes=G.nodes
- adj=G.adj
- memory={i:{i:1}foriinnodes}
- foriinrange(0,T):
- listenerslist=list(G.nodes)
- random.shuffle(listenerslist)
- forlistenerinlistenerslist:
- speakerlist=adj[listener]
- iflen(speakerlist)==0:
- continue
- labels=defaultdict(int)
- forspeakerinspeakerlist:
- # Speaker Rule
- total=float(sum(memory[speaker].values()))
- keys=list(memory[speaker].keys())
- index=np.random.multinomial(
- 1,[round(freq/total,2)forfreqinmemory[speaker].values()]
- ).argmax()
- chosen_label=keys[index]
- labels[chosen_label]+=1
- # Listener Rule
- maxlabel=max(labels.items(),key=lambdax:x[1])[0]
- ifmaxlabelinmemory[listener]:
- memory[listener][maxlabel]+=1
- else:
- memory[listener][maxlabel]=1
-
- fornode,labelsinmemory.items():
- name_list=[]
- forlabel_name,label_numberinlabels.items():
- ifround(label_number/float(T+1),2)<r:
- name_list.append(label_name)
- fornameinname_list:
- dellabels[name]
-
- # Find nodes membership
- communities={}
- fornode,labelsinmemory.items():
- forlabelinlabels:
- iflabelincommunities:
- communities[label].add(node)
- else:
- communities[label]={node}
-
- # Remove nested communities
- RemoveNested(communities)
-
- # Check Connectivity
- result_community=CheckConnectivity(G,communities)
- returnresult_community
-
-
-
[docs]@not_implemented_for("multigraph")
-defHANP(G,m,delta,threshod=1,hier_open=0,combine_open=0):
-"""Detect community by Hop attenuation & node preference algorithm
-
- Return the detected communities. But the result is random.
-
- Implement the basic HANP algorithm and give more freedom through the parameters, e.g., you can use threshod
- to set the condition for node updating. If network are known to be Hierarchical and overlapping communities,
- it's recommended to choose geodesic distance as the measure(instead of receiving the current hop scores
- from the neighborhood and carry out a subtraction) and When an equilibrium is reached, treat newly combined
- communities as a single node.
-
- For using Floyd to get the shortest distance, the time complexity is a little high.
-
- Parameters
- ----------
- G : graph
- A easygraph graph
- m : float
- Used to calculate score, when m > 0, more preference is given to node with more neighbors; m < 0, less
- delta : float
- Hop attenuation
- threshod : float
- Between 0 and 1, only update node whose number of neighbors sharing the maximal label is less than the threshod.
- e.g., threshod == 1 means updating all nodes.
- hier_open :
- 1 means using geodesic distance as the score measure.
- 0 means not.
- combine_open :
- this option is valid only when hier_open = 1
- 1 means When an equilibrium is reached, treat newly combined communities as a single node.
- 0 means not.
-
- Returns
- ----------
- communities : dictionary
- key: serial number of community , value: nodes in the community.
-
- Examples
- ----------
- >>> HANP(G,
- ... m = 0.1,
- ... delta = 0.05,
- ... threshod = 1,
- ... hier_open = 0,
- ... combine_open = 0
- ... )
-
- References
- ----------
- .. [1] Ian X. Y. Leung, Pan Hui, Pietro Liò, and Jon Crowcrof:
- Towards real-time community detection in large networks
-
- """
- nodes=list(G.nodes.keys())
- iflen(nodes)==1:
- return{1:[nodes[0]]}
- label_dict=dict()
- score_dict=dict()
- node_dict=dict()
- Next_label_dict=dict()
- cluster_community=dict()
- nodes=list(G.nodes.keys())
- degrees=G.degree()
- records=[]
- loop_count=0
- i=0
- old_score=1
- ori_G=G
- ifhier_open==1:
- distance_dict=eg.Floyd(G)
- fornodeinnodes:
- label_dict[node]=i
- score_dict[i]=1
- node_dict[i]=node
- i=i+1
- whileTrue:
- loop_count+=1
- random.shuffle(nodes)
- score=1
- fornodeinnodes:
- labels=SelectLabels_HANP(
- G,node,label_dict,score_dict,degrees,m,threshod
- )
- iflabels==[]:
- Next_label_dict[node]=label_dict[node]
- continue
- old_label=label_dict[node]
- Next_label_dict[node]=random.choice(labels)
- # Asynchronous updates. If you want to use synchronous updates, comment the line below
- label_dict[node]=Next_label_dict[node]
- ifhier_open==1:
- score_dict[Next_label_dict[node]]=UpdateScore_Hier(
- G,node,label_dict,node_dict,distance_dict
- )
- score=min(score,score_dict[Next_label_dict[node]])
- else:
- ifold_label==Next_label_dict[node]:
- cdelta=0
- else:
- cdelta=delta
- score_dict[Next_label_dict[node]]=UpdateScore(
- G,node,label_dict,score_dict,cdelta
- )
- ifhier_open==1andcombine_open==1:
- ifold_score-score>1/3:
- old_score=score
- (
- records,
- G,
- label_dict,
- score_dict,
- node_dict,
- Next_label_dict,
- nodes,
- degrees,
- distance_dict,
- )=CombineNodes(
- records,
- G,
- label_dict,
- score_dict,
- node_dict,
- Next_label_dict,
- nodes,
- degrees,
- distance_dict,
- )
- label_dict=Next_label_dict
- if(
- estimate_stop_cond_HANP(G,label_dict,score_dict,degrees,m,threshod)
- isTrue
- ):
- break
-"""As mentioned in the paper, it's suggested that the number of iterations
- required is independent to the number of nodes and that after
- five iterations, 95% of their nodes are already accurately clustered
- """
- ifloop_count>20:
- break
- print("After %d iterations, HANP complete."%loop_count)
- fornodeinlabel_dict.keys():
- label=label_dict[node]
- iflabelnotincluster_community.keys():
- cluster_community[label]=[node]
- else:
- cluster_community[label].append(node)
- ifhier_open==1andcombine_open==1:
- records.append(cluster_community)
- cluster_community=ShowRecord(records)
- result_community=CheckConnectivity(ori_G,cluster_community)
- returnresult_community
-
-
-
[docs]@not_implemented_for("multigraph")
-defBMLPA(G,p):
-"""Detect community by Balanced Multi-Label Propagation algorithm
-
- Return the detected communities.
-
- Firstly, initialize 'old' using cores generated by RC function, the propagate label till the number and size
- of communities stay no change, check if there are subcommunity and delete it. Finally, split discontinuous
- communities.
-
- For some directed graphs lead to oscillations of labels, modify the stop condition.
-
- Parameters
- ----------
- G : graph
- A easygraph graph
- p : float
- Between 0 and 1, judge Whether a community identifier should be retained
-
- Returns
- ----------
- communities : dictionary
- key: serial number of community , value: nodes in the community.
-
- Examples
- ----------
- >>> BMLPA(G,
- ... p = 0.1,
- ... )
-
- References
- ----------
- .. [1] Wu Zhihao, Lin You-Fang, Gregory Steve, Wan Huai-Yu, Tian Sheng-Feng
- Balanced Multi-Label Propagation for Overlapping Community Detection in Social Networks
-
- """
- nodes=list(G.nodes.keys())
- iflen(nodes)==1:
- return{1:[nodes[0]]}
- cores=Rough_Cores(G)
- nodes=G.nodes
- i=0
- old_label_dict=dict()
- new_label_dict=dict()
- forcoreincores:
- fornodeincore:
- ifnodenotinold_label_dict:
- old_label_dict[node]={i:1}
- else:
- old_label_dict[node][i]=1
- i+=1
- oldMin=dict()
- loop_count=0
- old_label_dictx=dict()
- whileTrue:
- loop_count+=1
- old_label_dictx=old_label_dict
- fornodeinnodes:
- Propagate_bbc(G,node,old_label_dict,new_label_dict,p)
- ifloop_count>50andold_label_dict==old_label_dictx:
- break
- Min=dict()
- ifId(old_label_dict)==Id(new_label_dict):
- Min=mc(count(old_label_dict),count(new_label_dict))
- else:
- Min=count(new_label_dict)
- ifloop_count>500:
- break
- ifMin!=oldMin:
- old_label_dict=copy.deepcopy(new_label_dict)
- oldMin=copy.deepcopy(Min)
- else:
- break
- print("After %d iterations, BMLPA complete."%loop_count)
- communities=dict()
- fornodeinnodes:
- forlabel,_inold_label_dict[node].items():
- iflabelincommunities:
- communities[label].add(node)
- else:
- communities[label]={node}
- RemoveNested(communities)
- result_community=CheckConnectivity(G,communities)
- returnresult_community
[docs]defego_graph(G,n,radius=1,center=True,undirected=False,distance=None):
-"""Returns induced subgraph of neighbors centered at node n within
- a given radius.
-
- Parameters
- ----------
- G : graph
- A EasyGraph Graph or DiGraph
-
- n : node
- A single node
-
- radius : number, optional
- Include all neighbors of distance<=radius from n.
-
- center : bool, optional
- If False, do not include center node in graph
-
- undirected : bool, optional
- If True use both in- and out-neighbors of directed graphs.
-
- distance : key, optional
- Use specified edge data key as distance. For example, setting
- distance='weight' will use the edge weight to measure the
- distance from the node n.
-
- Notes
- -----
- For directed graphs D this produces the "out" neighborhood
- or successors. If you want the neighborhood of predecessors
- first reverse the graph with D.reverse(). If you want both
- directions use the keyword argument undirected=True.
-
- Node, edge, and graph attributes are copied to the returned subgraph.
- """
- ifundirected:
-"""
- if distance is not None:
- sp, _ = eg.single_source_dijkstra(
- G.to_undirected(), n, cutoff=radius, weight=distance
- )
- else:
- sp = dict(
- eg.single_source_shortest_path_length(
- G.to_undirected(), n, cutoff=radius
- )
- )
- """
- else:
- ifdistanceisnotNone:
- sp=single_source_dijkstra(G,n,weight=distance)
- else:
- sp=single_source_dijkstra(G,n)
- nodes=[keyforkey,valueinsp.items()ifvalue<=radius]
- nodes=list(nodes)
-
- H=G.nodes_subgraph(nodes)
- ifnotcenter:
- H.remove_node(n)
- returnH
[docs]deflouvain_communities(G,weight="weight",threshold=0.00002):
-r"""Find the best partition of a graph using the Louvain Community Detection
- Algorithm.
-
- Louvain Community Detection Algorithm is a simple method to extract the community
- structure of a network. This is a heuristic method based on modularity optimization. [1]_
-
- The algorithm works in 2 steps. On the first step it assigns every node to be
- in its own community and then for each node it tries to find the maximum positive
- modularity gain by moving each node to all of its neighbor communities. If no positive
- gain is achieved the node remains in its original community.
-
- The modularity gain obtained by moving an isolated node $i$ into a community $C$ can
- easily be calculated by the following formula (combining [1]_ [2]_ and some algebra):
-
- .. math::
- \Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2}
-
- where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links
- from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$,
- $\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$
- is the resolution parameter.
-
- For the directed case the modularity gain can be computed using this formula according to [3]_
-
- .. math::
- \Delta Q = \frac{k_{i,in}}{m}
- - \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2}
-
- where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and
- $\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident
- to nodes in $C$.
-
- The first phase continues until no individual move can improve the modularity.
-
- The second phase consists in building a new network whose nodes are now the communities
- found in the first phase. To do so, the weights of the links between the new nodes are given by
- the sum of the weight of the links between nodes in the corresponding two communities. Once this
- phase is complete it is possible to reapply the first phase creating bigger communities with
- increased modularity.
-
- The above two phases are executed until no modularity gain is achieved (or is less than
- the `threshold`).
-
- Parameters
- ----------
- threshold
- G : easygraph
- weight : string or None, optional (default="weight")
- The name of an edge attribute that holds the numerical value
- used as a weight. If None then each edge has weight 1.
-
- Returns
- -------
- list
- A list of sets (partition of `G`). Each set represents one community and contains
- all the nodes that constitute it.
-
- Notes
- -----
- The order in which the nodes are considered can affect the final output. In the algorithm
- the ordering happens using a random shuffle.
-
- References
- ----------
- .. [1] Blondel, V.D. et al. Fast unfolding of communities in
- large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008
- .. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
- well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
- .. [3] Nicolas Dugu��, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.
- [Research Report] Universit�� d��Orl��ans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784
-
- See Also
- --------
- louvain_partitions
- """
- d=louvain_partitions(G,weight,threshold)
- q=deque(d,maxlen=1)
- # q.append(d)
- returnq.pop()
-
-
-
[docs]deflouvain_partitions(G,weight="weight",threshold=0.0000001):
-"""Yields partitions for each level of the Louvain Community Detection Algorithm
-
- Louvain Community Detection Algorithm is a simple method to extract the community
- structure of a network. This is a heuristic method based on modularity optimization. [1]_
-
- The partitions at each level (step of the algorithm) form a dendogram of communities.
- A dendrogram is a diagram representing a tree and each level represents
- a partition of the G graph. The top level contains the smallest communities
- and as you traverse to the bottom of the tree the communities get bigger
- and the overall modularity increases making the partition better.
-
- Each level is generated by executing the two phases of the Louvain Community
- Detection Algorithm.
-
- Parameters
- ----------
- threshold
- G : easygraph
- weight : string or None, optional (default="weight")
- The name of an edge attribute that holds the numerical value
- used as a weight. If None then each edge has weight 1.
-
- Yields
- ------
- list
- A list of sets (partition of `G`). Each set represents one community and contains
- all the nodes that constitute it.
-
- References
- ----------
- .. [1] Blondel, V.D. et al. Fast unfolding of communities in
- large networks. J. Stat. Mech 10008, 1-12(2008)
-
- See Also
- --------
- louvain_communities
- """
- partition=[{u}foruinG.nodes]
- mod=modularity(G,partition)
- is_directed=G.is_directed()
- ifG.is_multigraph():
- G=_convert_multigraph(G,weight,is_directed)
- else:
- graph=G.__class__()
- graph.add_nodes_from(G)
- graph.add_edges_from(G.edges,weight=1)
- G=graph
-
- m=G.size(weight="weight")
- partition,inner_partition,improvement=_one_level(G,m,partition,is_directed)
- improvement=True
- whileimprovement:
- # gh-5901 protect the sets in the yielded list from further manipulation here
-
- yield[s.copy()forsinpartition]
- new_mod=modularity(G,inner_partition,weight="weight")
- ifnew_mod-mod<=threshold:
- return
- mod=new_mod
-"""
- for node1, node2, wt in G.edges:
- print(node1,node2,wt)
- print("\n")
- """
- G=_gen_graph(G,inner_partition)
-"""
- for node1, node2, wt in G.edges:
- print(node1,node2,wt)
- """
- partition,inner_partition,improvement=_one_level(
- G,m,partition,is_directed,1
- )
-
-
-def_one_level(G,m,partition,resolution=1,is_directed=False,seed=None,tes=0):
-"""Calculate one level of the Louvain partitions tree
-
- Parameters
- ----------
- G : EasyGraph Graph/DiGraph
- The graph from which to detect communities
- m : number
- The size of the graph `G`.
- partition : list of sets of nodes
- A valid partition of the graph `G`
- resolution : positive number
- The resolution parameter for computing the modularity of a partition
- is_directed : bool
- True if `G` is a directed graph.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
-
- """
- node2com={u:ifori,uinenumerate(G.nodes)}
- inner_partition=[{u}foruinG.nodes]
-"""
- if is_directed:
- in_degrees = dict(G.in_degree(weight="weight"))
- out_degrees = dict(G.out_degree(weight="weight"))
- Stot_in = list(in_degrees.values())
- Stot_out = list(out_degrees.values())
- # Calculate weights for both in and out neighbours
- nbrs = {}
- for u in G:
- nbrs[u] = defaultdict(float)
- for _, n, wt in G.out_edges(u, data="weight"):
- nbrs[u][n] += wt
- for n, _, wt in G.in_edges(u, data="weight"):
- nbrs[u][n] += wt
- pass
- else:
- """
- degrees=dict(G.degree(weight="weight"))
- Stot=[]
- foriinG:
- Stot.append(len(G[i]))
-
- # for c in Stot:
- # print(c)
-
- nbrs={u:{v:data["weight"]forv,datainG[u].items()ifv!=u}foruinG}
- rand_nodes=list(G.nodes)
- # seed.shuffle(rand_nodes)
- nb_moves=1
- improvement=False
- whilenb_moves>0:
- # print(nb_moves)
-
- nb_moves=0
- foruinrand_nodes:
- best_mod=0
- best_com=node2com[u]
- weights2com=_neighbor_weights(nbrs[u],node2com)
-"""
- if is_directed:
- in_degree = in_degrees[u]
- out_degree = out_degrees[u]
- Stot_in[best_com] -= in_degree
- Stot_out[best_com] -= out_degree
- remove_cost = (
- -weights2com[best_com] / m
- + (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com])
- / m**2
- )
- else:
- """
- degree=degrees[u]
- Stot[best_com]-=degree
- remove_cost=-weights2com[best_com]/m+(Stot[best_com]*degree)/(
- 2*m**2
- )
- fornbr_com,wtinweights2com.items():
-"""
- if is_directed:
- gain = (
- remove_cost
- + wt / m
- - (
- out_degree * Stot_in[nbr_com]
- + in_degree * Stot_out[nbr_com]
- )
- / m**2
- )
- else:
- """
- gain=remove_cost+wt/m-(Stot[nbr_com]*degree)/(2*m**2)
- ifgain>best_mod:
- best_mod=gain
- best_com=nbr_com
-"""
- if is_directed:
- Stot_in[best_com] += in_degree
- Stot_out[best_com] += out_degree
- else:
- """
- Stot[best_com]+=degree
-
- ifbest_com!=node2com[u]:
- com=G.nodes[u].get("nodes",{u})
- partition[node2com[u]].difference_update(com)
- inner_partition[node2com[u]].remove(u)
- partition[best_com].update(com)
- inner_partition[best_com].add(u)
- improvement=True
- nb_moves+=1
- node2com[u]=best_com
- partition=list(filter(len,partition))
- inner_partition=list(filter(len,inner_partition))
-
- # for c in partition:
- # print(c)
-
- returnpartition,inner_partition,improvement
-
-
-def_neighbor_weights(nbrs,node2com):
-"""Calculate weights between node and its neighbor communities.
-
- Parameters
- ----------
- nbrs : dictionary
- Dictionary with nodes' neighbours as keys and their edge weight as value.
- node2com : dictionary
- Dictionary with all graph's nodes as keys and their community index as value.
-
- """
- weights=defaultdict(float)
- fornbr,wtinnbrs.items():
- weights[node2com[nbr]]+=wt
- returnweights
-
-
-def_gen_graph(G,partition):
-"""Generate a new graph based on the partitions of a given graph"""
- H=G.__class__()
- node2com={}
- fori,partinenumerate(partition):
- nodes=set()
- fornodeinpart:
- node2com[node]=i
- nodes.update(G.nodes[node].get("nodes",{node}))
- H.add_node(i,nodes=nodes)
-
- fornode1,node2,wtinG.edges:
- com1=node2com[node1]
- com2=node2com[node2]
- wt=wt["weight"]
- try:
- temp=H[com1][com2]["weight"]
- exceptKeyError:
- temp=0
- H.add_edge(com1,com2,weight=wt+temp)
-"""
- if wt:
- wt = wt["weight"]
- H.add_edge(com1, com2, weight=wt)
- else:
- H.add_edge(com1, com2, weight=1)
- """
- returnH
-
-
-def_convert_multigraph(G,weight,is_directed):
-"""Convert a Multigraph to normal Graph"""
- ifis_directed:
- H=eg.DiGraph()
- else:
- H=eg.Graph()
- H.add_nodes_from(G)
- foru,v,wtinG.edges(data=weight,default=1):
- ifH.has_edge(u,v):
- H[u][v]["weight"]+=wt
- else:
- H.add_edge(u,v,weight=wt)
- returnH
-
[docs]@not_implemented_for("multigraph")
-defgreedy_modularity_communities(G,weight="weight"):
-"""Communities detection via greedy modularity method.
-
- Find communities in graph using Clauset-Newman-Moore greedy modularity
- maximization. This method currently supports the Graph class.
-
- Greedy modularity maximization begins with each node in its own community
- and joins the pair of communities that most increases modularity until no
- such pair exists.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- weight : string (default : 'weight')
- The key for edge weight. For undirected graph, it will regard each edge
- weight as 1.
-
- Returns
- ----------
- Yields sets of nodes, one for each community.
-
- References
- ----------
- .. [1] Newman, M. E. J. "Networks: An Introduction Oxford Univ." (2010).
- .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
- "Finding community structure in very large networks." Physical review E 70.6 (2004): 066111.
- """
-
- # Count nodes and edges
-
- N=len(G.nodes)
- m=sum(d.get(weight,1)foru,v,dinG.edges)
- ifN==0orm==0:
- print("Please input the graph which has at least one edge!")
- exit()
- q0=1.0/(2.0*m)
-
- # Map node labels to contiguous integers
- label_for_node={i:vfori,vinenumerate(G.nodes)}
- node_for_label={label_for_node[i]:iforiinrange(N)}
-
- # Calculate degrees
- k_for_label=G.degree(weight=weight)
- k=[k_for_label[label_for_node[i]]foriinrange(N)]
-
- # Initialize community and merge lists
- communities={i:frozenset([i])foriinrange(N)}
- merges=[]
-
- # Initial modularity
- partition=[[label_for_node[x]forxinc]forcincommunities.values()]
- q_cnm=modularity(G,partition)
-
- # Initialize data structures
- # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji)
- # a[i]: fraction of edges within community i
- # dq_dict[i][j]: dQ for merging community i, j
- # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ
- # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij)
- a=[k[i]*q0foriinrange(N)]
- dq_dict={
- i:{
- j:2*q0-2*k[i]*k[j]*q0*q0
- forjin[node_for_label[u]foruinG.neighbors(label_for_node[i])]
- ifj!=i
- }
- foriinrange(N)
- }
- dq_heap=[
- MappedQueue([(-dq,i,j)forj,dqindq_dict[i].items()])foriinrange(N)
- ]
- H=MappedQueue([dq_heap[i].h[0]foriinrange(N)iflen(dq_heap[i])>0])
-
- # Merge communities until we can't improve modularity
- whilelen(H)>1:
- # Find best merge
- # Remove from heap of row maxes
- # Ties will be broken by choosing the pair with lowest min community id
- try:
- dq,i,j=H.pop()
- exceptIndexError:
- break
- dq=-dq
- # Remove best merge from row i heap
- dq_heap[i].pop()
- # Push new row max onto H
- iflen(dq_heap[i])>0:
- H.push(dq_heap[i].h[0])
- # If this element was also at the root of row j, we need to remove the
- # duplicate entry from H
- ifdq_heap[j].h[0]==(-dq,j,i):
- H.remove((-dq,j,i))
- # Remove best merge from row j heap
- dq_heap[j].remove((-dq,j,i))
- # Push new row max onto H
- iflen(dq_heap[j])>0:
- H.push(dq_heap[j].h[0])
- else:
- # Duplicate wasn't in H, just remove from row j heap
- dq_heap[j].remove((-dq,j,i))
- # Stop when change is non-positive
- ifdq<=0:
- break
-
- # Perform merge
- communities[j]=frozenset(communities[i]|communities[j])
- delcommunities[i]
- merges.append((i,j,dq))
- # New modularity
- q_cnm+=dq
- # Get list of communities connected to merged communities
- i_set=set(dq_dict[i].keys())
- j_set=set(dq_dict[j].keys())
- all_set=(i_set|j_set)-{i,j}
- both_set=i_set&j_set
- # Merge i into j and update dQ
- forkinall_set:
- # Calculate new dq value
- ifkinboth_set:
- dq_jk=dq_dict[j][k]+dq_dict[i][k]
- elifkinj_set:
- dq_jk=dq_dict[j][k]-2.0*a[i]*a[k]
- else:
- # k in i_set
- dq_jk=dq_dict[i][k]-2.0*a[j]*a[k]
- # Update rows j and k
- forrow,colin[(j,k),(k,j)]:
- # Save old value for finding heap index
- ifkinj_set:
- d_old=(-dq_dict[row][col],row,col)
- else:
- d_old=None
- # Update dict for j,k only (i is removed below)
- dq_dict[row][col]=dq_jk
- # Save old max of per-row heap
- iflen(dq_heap[row])>0:
- d_oldmax=dq_heap[row].h[0]
- else:
- d_oldmax=None
- # Add/update heaps
- d=(-dq_jk,row,col)
- ifd_oldisNone:
- # We're creating a new nonzero element, add to heap
- dq_heap[row].push(d)
- else:
- # Update existing element in per-row heap
- dq_heap[row].update(d_old,d)
- # Update heap of row maxes if necessary
- ifd_oldmaxisNone:
- # No entries previously in this row, push new max
- H.push(d)
- else:
- # We've updated an entry in this row, has the max changed?
- ifdq_heap[row].h[0]!=d_oldmax:
- H.update(d_oldmax,dq_heap[row].h[0])
-
- # Remove row/col i from matrix
- i_neighbors=dq_dict[i].keys()
- forkini_neighbors:
- # Remove from dict
- dq_old=dq_dict[k][i]
- deldq_dict[k][i]
- # Remove from heaps if we haven't already
- ifk!=j:
- # Remove both row and column
- forrow,colin[(k,i),(i,k)]:
- # Check if replaced dq is row max
- d_old=(-dq_old,row,col)
- ifdq_heap[row].h[0]==d_old:
- # Update per-row heap and heap of row maxes
- dq_heap[row].remove(d_old)
- H.remove(d_old)
- # Update row max
- iflen(dq_heap[row])>0:
- H.push(dq_heap[row].h[0])
- else:
- # Only update per-row heap
- dq_heap[row].remove(d_old)
-
- deldq_dict[i]
- # Mark row i as deleted, but keep placeholder
- dq_heap[i]=MappedQueue()
- # Merge i into j and update a
- a[j]+=a[i]
- a[i]=0
-
- communities=[
- frozenset(label_for_node[i]foriinc)forcincommunities.values()
- ]
- returnsorted(communities,key=len,reverse=True)
[docs]@not_implemented_for("multigraph")
-defenumerate_subgraph(G,k:int):
-"""
- Returns the motifs.
- Motifs are small weakly connected induced subgraphs of a given structure in a graph.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph.
-
- k : int
- The size of the motifs to search for.
-
- Returns
- ----------
- k_subgraphs : list
- The motifs.
-
- References
- ----------
- .. [1] Wernicke, Sebastian. "Efficient detection of network motifs."
- IEEE/ACM transactions on computational biology and bioinformatics 3.4 (2006): 347-359.
-
- """
- k_subgraphs=[]
- forv,_inG.nodes.items():
- Vextension={uforuinG.adj[v]ifu>v}
- extend_subgraph(G,{v},Vextension,v,k,k_subgraphs)
- returnk_subgraphs
[docs]@not_implemented_for("multigraph")
-defrandom_enumerate_subgraph(G,k:int,cut_prob:list):
-"""
- Returns the motifs.
- Motifs are small weakly connected induced subgraphs of a given structure in a graph.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph.
-
- k : int
- The size of the motifs to search for.
-
- cut_prob : list
- list of probabilities for cutting the search tree at a given level.
-
- Returns
- ----------
- k_subgraphs : list
- The motifs.
-
- References
- ----------
- .. [1] Wernicke, Sebastian. "A faster algorithm for detecting network motifs."
- International Workshop on Algorithms in Bioinformatics. Springer, Berlin, Heidelberg, 2005.
-
- """
- iflen(cut_prob)!=k:
- raiseeg.EasyGraphError("length of cut_prob invalid, should equal to k")
-
- k_subgraphs=[]
- forv,_inG.nodes.items():
- ifrandom.random()>cut_prob[0]:
- continue
- Vextension={uforuinG.adj[v]ifu>v}
- random_extend_subgraph(G,{v},Vextension,v,k,k_subgraphs,cut_prob)
- returnk_subgraphs
[docs]@not_implemented_for("multigraph","directed")
-defis_biconnected(G):
-"""Returns whether the graph is biconnected or not.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- Returns
- -------
- is_biconnected : boolean
- `True` if the graph is biconnected.
-
- Examples
- --------
-
- >>> is_biconnected(G)
-
- """
- bc_nodes=list(generator_biconnected_components_nodes(G))
- iflen(bc_nodes)==1:
- returnlen(bc_nodes[0])==len(
- G
- )# avoid situations where there is isolated vertex
- returnFalse
-
-
-
[docs]@not_implemented_for("multigraph","directed")
-# TODO: get the subgraph of each biconnected graph
-defbiconnected_components(G):
-"""Returns a list of biconnected components, each of which denotes the edges set of a biconnected component.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- Returns
- -------
- biconnected_components : list of list
- Each element list is the edges set of a biconnected component.
-
- Examples
- --------
- >>> connected_components(G)
-
- """
- returnlist(generator_biconnected_components_edges(G))
-
-
-
[docs]@not_implemented_for("multigraph","directed")
-defgenerator_biconnected_components_nodes(G):
-"""Returns a generator of nodes in each biconnected component.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- Returns
- -------
- Yields nodes set of each biconnected component.
-
- See Also
- --------
- generator_biconnected_components_edges
-
- Examples
- --------
- >>> generator_biconnected_components_nodes(G)
-
-
- """
- forcomponentin_biconnected_dfs_record_edges(G,need_components=True):
- # TODO: only one edge = biconnected_component?
- yieldset(chain.from_iterable(component))
-
-
-
[docs]@not_implemented_for("multigraph","directed")
-defgenerator_biconnected_components_edges(G):
-"""Returns a generator of nodes in each biconnected component.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- Returns
- -------
- Yields edges set of each biconnected component.
-
- See Also
- --------
- generator_biconnected_components_nodes
-
- Examples
- --------
- >>> generator_biconnected_components_edges(G)
-
- """
- yield from_biconnected_dfs_record_edges(G,need_components=True)
-
-
-
[docs]@not_implemented_for("multigraph","directed")
-defgenerator_articulation_points(G):
-"""Returns a generator of articulation points.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- Returns
- -------
- Yields the articulation point in *G*.
-
- Examples
- --------
- >>> generator_articulation_points(G)
-
- """
- seen=set()
- forcut_vertexin_biconnected_dfs_record_edges(G,need_components=False):
- ifcut_vertexnotinseen:
- seen.add(cut_vertex)
- yieldcut_vertex
-
-
-@hybrid("cpp_biconnected_dfs_record_edges")
-def_biconnected_dfs_record_edges(G,need_components=True):
-"""
- References
- ----------
- https://www.cnblogs.com/nullzx/p/7968110.html
- https://blog.csdn.net/gauss_acm/article/details/43493903
- """
- # record edges of each biconnected component in traversal
- # Copied version from EasyGraph
- # depth-first search algorithm to generate articulation points
- # and biconnected components
- visited=set()
- forstartinG:
- ifstartinvisited:
- continue
- discovery={start:0}# time of first discovery of node during search
- low={start:0}
- root_children=0
- visited.add(start)
- edge_stack=[]
- stack=[(start,start,iter(G[start]))]
- whilestack:
- grandparent,parent,children=stack[-1]
- try:
- child=next(children)
- ifgrandparent==child:
- continue
- ifchildinvisited:
- ifdiscovery[child]<=discovery[parent]:# back edge
- low[parent]=min(low[parent],discovery[child])
- ifneed_components:
- edge_stack.append((parent,child))
- else:
- low[child]=discovery[child]=len(discovery)
- visited.add(child)
- stack.append((parent,child,iter(G[child])))
- ifneed_components:
- edge_stack.append((parent,child))
- exceptStopIteration:
- stack.pop()
- iflen(stack)>1:
- iflow[parent]>=discovery[grandparent]:
- ifneed_components:
- ind=edge_stack.index((grandparent,parent))
- yieldedge_stack[ind:]
- edge_stack=edge_stack[:ind]
- else:
- yieldgrandparent
- low[grandparent]=min(low[parent],low[grandparent])
- elifstack:# length 1 so grandparent is root
- root_children+=1
- ifneed_components:
- ind=edge_stack.index((grandparent,parent))
- yieldedge_stack[ind:]
- ifnotneed_components:
- # root node is articulation point if it has more than 1 child
- ifroot_children>1:
- yieldstart
-
-
-def_biconnected_dfs_record_nodes(G,need_components=True):
- # record nodes of each biconnected component in traversal
- # Not used.
- visited=set()
- forstartinG:
- ifstartinvisited:
- continue
- discovery={start:0}# time of first discovery of node during search
- low={start:0}
- root_children=0
- visited.add(start)
- node_stack=[start]
- stack=[(start,start,iter(G[start]))]
- whilestack:
- grandparent,parent,children=stack[-1]
- try:
- child=next(children)
- ifgrandparent==child:
- continue
- ifchildinvisited:
- ifdiscovery[child]<=discovery[parent]:# back edge
- low[parent]=min(low[parent],discovery[child])
- else:
- low[child]=discovery[child]=len(discovery)
- visited.add(child)
- stack.append((parent,child,iter(G[child])))
- ifneed_components:
- node_stack.append(child)
- exceptStopIteration:
- stack.pop()
- iflen(stack)>1:
- iflow[parent]>=discovery[grandparent]:
- ifneed_components:
- ind=node_stack.index(grandparent)
- yieldnode_stack[ind:]
- node_stack=node_stack[:ind+1]
- else:
- yieldgrandparent
- low[grandparent]=min(low[parent],low[grandparent])
- elifstack:# length 1 so grandparent is root
- root_children+=1
- ifneed_components:
- ind=node_stack.index(grandparent)
- yieldnode_stack[ind:]
- ifnotneed_components:
- # root node is articulation point if it has more than 1 child
- ifroot_children>1:
- yieldstart
-
[docs]@not_implemented_for("multigraph")
-defis_connected(G):
-"""Returns whether the graph is connected or not.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- Returns
- -------
- is_biconnected : boolean
- `True` if the graph is connected.
-
- Examples
- --------
-
- >>> is_connected(G)
-
- """
- assertlen(G)!=0,"No node in the graph."
- arbitrary_node=next(iter(G))# Pick an arbitrary node to run BFS
- returnlen(G)==sum(1fornodein_plain_bfs(G,arbitrary_node))
-
-
-
[docs]@not_implemented_for("multigraph")
-defnumber_connected_components(G):
-"""Returns the number of connected components.
-
- Parameters
- ----------
- G : easygraph.Graph
-
- Returns
- -------
- number_connected_components : int
- The number of connected components.
-
- Examples
- --------
- >>> number_connected_components(G)
-
- """
- returnsum(1forcomponentin_generator_connected_components(G))
-
-
-@not_implemented_for("multigraph")
-@hybrid("cpp_connected_components_undirected")
-defconnected_components(G):
-"""Returns a list of connected components, each of which denotes the edges set of a connected component.
-
- Parameters
- ----------
- G : easygraph.Graph
- Returns
- -------
- connected_components : list of list
- Each element list is the edges set of a connected component.
-
- Examples
- --------
- >>> connected_components(G)
-
- """
- seen=set()
- forvinG:
- ifvnotinseen:
- c=set(_plain_bfs(G,v))
- seen.update(c)
- yieldc
-
-
-@not_implemented_for("multigraph")
-@hybrid("cpp_connected_components_directed")
-defconnected_components_directed(G):
-"""Returns a list of connected components, each of which denotes the edges set of a connected component.
-
- Parameters
- ----------
- G : easygraph.DiGraph
- Returns
- -------
- connected_components : list of list
- Each element list is the edges set of a connected component.
-
- Examples
- --------
- >>> connected_components(G)
-
- """
- seen=set()
- forvinG:
- ifvnotinseen:
- c=set(_plain_bfs(G,v))
- seen.update(c)
- yieldc
-
-
-def_generator_connected_components(G):
- seen=set()
- forvinG:
- ifvnotinseen:
- component=set(_plain_bfs(G,v))
- yieldcomponent
- seen.update(component)
-
-
-
[docs]@not_implemented_for("multigraph")
-defconnected_component_of_node(G,node):
-"""Returns the connected component that *node* belongs to.
-
- Parameters
- ----------
- G : easygraph.Graph
-
- node : object
- The target node
-
- Returns
- -------
- connected_component_of_node : set
- The connected component that *node* belongs to.
-
- Examples
- --------
- Returns the connected component of one node `Jack`.
-
- >>> connected_component_of_node(G, node='Jack')
-
- """
- returnset(_plain_bfs(G,node))
Source code for easygraph.functions.components.strongly_connected
-importeasygraphaseg
-
-fromeasygraph.utils.decoratorsimport*
-
-
-__all__=[
- "number_strongly_connected_components",
- "strongly_connected_components",
- "is_strongly_connected",
- "condensation",
-]
-
-
-@not_implemented_for("undirected")
-@hybrid("cpp_strongly_connected_components")
-defstrongly_connected_components(G):
-"""Generate nodes in strongly connected components of graph.
-
- Parameters
- ----------
- G : EasyGraph Graph
- A directed graph.
-
- Returns
- -------
- comp : generator of sets
- A generator of sets of nodes, one for each strongly connected
- component of G.
-
- Raises
- ------
- EasyGraphNotImplemented
- If G is undirected.
-
- Examples
- --------
- Generate a sorted list of strongly connected components, largest first.
-
- If you only want the largest component, it's more efficient to
- use max instead of sort.
-
- >>> largest = max(eg.strongly_connected_components(G), key=len)
-
- See Also
- --------
- connected_components
-
- Notes
- -----
- Uses Tarjan's algorithm[1]_ with Nuutila's modifications[2]_.
- Nonrecursive version of algorithm.
-
- References
- ----------
- .. [1] Depth-first search and linear graph algorithms, R. Tarjan
- SIAM Journal of Computing 1(2):146-160, (1972).
-
- .. [2] On finding the strongly connected components in a directed graph.
- E. Nuutila and E. Soisalon-Soinen
- Information Processing Letters 49(1): 9-14, (1994)..
-
- """
- preorder={}
- lowlink={}
- scc_found=set()
- scc_queue=[]
- i=0# Preorder counter
- neighbors={v:iter(G[v])forvinG}
- forsourceinG:
- ifsourcenotinscc_found:
- queue=[source]
- whilequeue:
- v=queue[-1]
- ifvnotinpreorder:
- i=i+1
- preorder[v]=i
- done=True
- forwinneighbors[v]:
- ifwnotinpreorder:
- queue.append(w)
- done=False
- break
- ifdone:
- lowlink[v]=preorder[v]
- forwinG[v]:
- ifwnotinscc_found:
- ifpreorder[w]>preorder[v]:
- lowlink[v]=min([lowlink[v],lowlink[w]])
- else:
- lowlink[v]=min([lowlink[v],preorder[w]])
- queue.pop()
- iflowlink[v]==preorder[v]:
- scc={v}
- whilescc_queueandpreorder[scc_queue[-1]]>preorder[v]:
- k=scc_queue.pop()
- scc.add(k)
- scc_found.update(scc)
- yieldscc
- else:
- scc_queue.append(v)
-
-
-
[docs]defnumber_strongly_connected_components(G):
-"""Returns number of strongly connected components in graph.
-
- Parameters
- ----------
- G : Easygraph graph
- A directed graph.
-
- Returns
- -------
- n : integer
- Number of strongly connected components
-
- Raises
- ------
- EasygraphNotImplemented
- If G is undirected.
-
- Examples
- --------
- >>> G = eg.DiGraph([(0, 1), (1, 2), (2, 0), (2, 3), (4, 5), (3, 4), (5, 6), (6, 3), (6, 7)])
- >>> eg.number_strongly_connected_components(G)
- 3
-
- See Also
- --------
- strongly_connected_components
- number_connected_components
-
- Notes
- -----
- For directed graphs only.
- """
- returnsum(1forsccinstrongly_connected_components(G))
-
-
-
[docs]@not_implemented_for("undirected")
-defis_strongly_connected(G):
-"""Test directed graph for strong connectivity.
-
- A directed graph is strongly connected if and only if every vertex in
- the graph is reachable from every other vertex.
-
- Parameters
- ----------
- G : EasyGraph Graph
- A directed graph.
-
- Returns
- -------
- connected : bool
- True if the graph is strongly connected, False otherwise.
-
- Examples
- --------
- >>> G = eg.DiGraph([(0, 1), (1, 2), (2, 3), (3, 0), (2, 4), (4, 2)])
- >>> eg.is_strongly_connected(G)
- True
- >>> G.remove_edge(2, 3)
- >>> eg.is_strongly_connected(G)
- False
-
- Raises
- ------
- EasyGraphNotImplemented
- If G is undirected.
-
- See Also
- --------
- is_connected
- is_biconnected
- strongly_connected_components
-
- Notes
- -----
- For directed graphs only.
- """
- iflen(G)==0:
- raiseeg.EasyGraphPointlessConcept(
-"""Connectivity is undefined for the null graph."""
- )
-
- returnlen(next(strongly_connected_components(G)))==len(G)
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_Directed_graph
-defcondensation(G,scc=None):
-"""Returns the condensation of G.
- The condensation of G is the graph with each of the strongly connected
- components contracted into a single node.
- Parameters
- ----------
- G : easygraph.DiGraph
- A directed graph.
- scc: list or generator (optional, default=None)
- Strongly connected components. If provided, the elements in
- `scc` must partition the nodes in `G`. If not provided, it will be
- calculated as scc=strongly_connected_components(G).
- Returns
- -------
- C : easygraph.DiGraph
- The condensation graph C of G. The node labels are integers
- corresponding to the index of the component in the list of
- strongly connected components of G. C has a graph attribute named
- 'mapping' with a dictionary mapping the original nodes to the
- nodes in C to which they belong. Each node in C also has a node
- attribute 'members' with the set of original nodes in G that
- form the SCC that the node in C represents.
- Examples
- --------
- # >>> condensation(G)
- Notes
- -----
- After contracting all strongly connected components to a single node,
- the resulting graph is a directed acyclic graph.
- """
- ifsccisNone:
- scc=strongly_connected_components(G)
- mapping={}
- incoming_info={}
- members={}
- C=eg.DiGraph()
- # Add mapping dict as graph attribute
- C.graph["mapping"]=mapping
- iflen(G)==0:
- returnC
- fori,componentinenumerate(scc):
- members[i]=component
- mapping.update((n,i)fornincomponent)
- number_of_components=i+1
- foriinrange(number_of_components):
- C.add_node(i,member=members[i],incoming=set())
- C.add_nodes(range(number_of_components))
- foredgeinG.edges:
- ifmapping[edge[0]]!=mapping[edge[1]]:
- C.add_edge(mapping[edge[0]],mapping[edge[1]])
- ifedge[1]notinincoming_info.keys():
- incoming_info[edge[1]]=set()
- incoming_info[edge[1]].add(edge[0])
- C.graph["incoming_info"]=incoming_info
- returnC
[docs]@not_implemented_for("undirected")
-defweakly_connected_components(G):
-"""Generate weakly connected components of G.
-
- Parameters
- ----------
- G : EasyGraph graph
- A directed graph
-
- Returns
- -------
- comp : generator of sets
- A generator of sets of nodes, one for each weakly connected
- component of G.
-
- Raises
- ------
- EasyGraphNotImplemented
- If G is undirected.
-
- Examples
- --------
- Generate a sorted list of weakly connected components, largest first.
-
- >>> G = eg.path_graph(4, create_using=eg.DiGraph())
- >>> eg.add_path(G, [10, 11, 12])
- >>> [
- ... len(c)
- ... for c in sorted(eg.weakly_connected_components(G), key=len, reverse=True)
- ... ]
- [4, 3]
-
- If you only want the largest component, it's more efficient to
- use max instead of sort:
-
- >>> largest_cc = max(eg.weakly_connected_components(G), key=len)
-
- See Also
- --------
- connected_components
- strongly_connected_components
-
- Notes
- -----
- For directed graphs only.
-
- """
- seen=set()
- forvinG:
- ifvnotinseen:
- c=set(_plain_bfs(G,v))
- seen.update(c)
- yieldc
-
-
-
[docs]@not_implemented_for("undirected")
-defnumber_weakly_connected_components(G):
-"""Returns the number of weakly connected components in G.
-
- Parameters
- ----------
- G : EasyGraph graph
- A directed graph.
-
- Returns
- -------
- n : integer
- Number of weakly connected components
-
- Raises
- ------
- EasyGraphNotImplemented
- If G is undirected.
-
- Examples
- --------
- >>> G = eg.DiGraph([(0, 1), (2, 1), (3, 4)])
- >>> eg.number_weakly_connected_components(G)
- 2
-
- See Also
- --------
- weakly_connected_components
- number_connected_components
- number_strongly_connected_components
-
- Notes
- -----
- For directed graphs only.
-
- """
- returnsum(1forwccinweakly_connected_components(G))
-
-
-
[docs]@not_implemented_for("undirected")
-defis_weakly_connected(G):
-"""Test directed graph for weak connectivity.
-
- A directed graph is weakly connected if and only if the graph
- is connected when the direction of the edge between nodes is ignored.
-
- Note that if a graph is strongly connected (i.e. the graph is connected
- even when we account for directionality), it is by definition weakly
- connected as well.
-
- Parameters
- ----------
- G : EasyGraph Graph
- A directed graph.
-
- Returns
- -------
- connected : bool
- True if the graph is weakly connected, False otherwise.
-
- Raises
- ------
- EasyGraphNotImplemented
- If G is undirected.
-
- Examples
- --------
- >>> G = eg.DiGraph([(0, 1), (2, 1)])
- >>> G.add_node(3)
- >>> eg.is_weakly_connected(G) # node 3 is not connected to the graph
- False
- >>> G.add_edge(2, 3)
- >>> eg.is_weakly_connected(G)
- True
-
- See Also
- --------
- is_strongly_connected
- is_semiconnected
- is_connected
- is_biconnected
- weakly_connected_components
-
- Notes
- -----
- For directed graphs only.
-
- """
- iflen(G)==0:
- raiseeg.EasyGraphPointlessConcept(
-"""Connectivity is undefined for the null graph."""
- )
-
- returnlen(next(weakly_connected_components(G)))==len(G)
-
-
-def_plain_bfs(G,source):
-"""A fast BFS node generator
-
- The direction of the edge between nodes is ignored.
-
- For directed graphs only.
-
- """
- Gsucc=G.adj
- Gpred=G.pred
-
- seen=set()
- nextlevel={source}
- whilenextlevel:
- thislevel=nextlevel
- nextlevel=set()
- forvinthislevel:
- ifvnotinseen:
- seen.add(v)
- nextlevel.update(Gsucc[v])
- nextlevel.update(Gpred[v])
- yieldv
-
[docs]@hybrid("cpp_k_core")
-defk_core(G:"Graph")->Union["Graph",List]:
-"""
- Returns the k-core of G.
-
- A k-core is a maximal subgraph that contains nodes of degree k or more.
-
- Parameters
- ----------
- G : EasyGraph graph
- A graph or directed graph
- k : int, optional
- The order of the core. If not specified return the main core.
- return_graph : bool, optional
- If True, return the k-core as a graph. If False, return a list of nodes.
-
- Returns
- -------
- G : EasyGraph graph, if return_graph is True, else a list of nodes
- The k-core subgraph
- """
- # Create a shallow copy of the input graph
- H=G.copy()
-
- # Initialize a dictionary to store the degrees of the nodes
- degrees=dict(G.degree())
- # Sort nodes by degree.
- nodes=sorted(degrees,key=degrees.get)
- bin_boundaries=[0]
- curr_degree=0
- fori,vinenumerate(nodes):
- ifdegrees[v]>curr_degree:
- bin_boundaries.extend([i]*(degrees[v]-curr_degree))
- curr_degree=degrees[v]
- node_pos={v:posforpos,vinenumerate(nodes)}
- # The initial guess for the core number of a node is its degree.
- core=degrees
- nbrs={v:list(G.neighbors(v))forvinG}
- forvinnodes:
- foruinnbrs[v]:
- ifcore[u]>core[v]:
- nbrs[u].remove(v)
- pos=node_pos[u]
- bin_start=bin_boundaries[core[u]]
- node_pos[u]=bin_start
- node_pos[nodes[bin_start]]=pos
- nodes[bin_start],nodes[pos]=nodes[pos],nodes[bin_start]
- bin_boundaries[core[u]]+=1
- core[u]-=1
- ret=[0.0foriinrange(len(G))]
- foriinrange(len(ret)):
- ret[i]=core[G.index2node[i]]
- returnret
-importeasygraphaseg
-
-
-__all__=[
- "plot_Followers",
- "plot_Connected_Communities",
- "plot_Betweenness_Centrality",
- "plot_Neighborhood_Followers",
-]
-
-
-# Number of Followers
-
[docs]defplot_Followers(G,SHS):
-"""
- Returns the CDF curves of "Number of Followers" of SH spanners and ordinary users in graph G.
-
- Parameters
- ----------
- G : graph
- A easygraph graph.
-
- SHS : list
- The SH Spanners in graph G.
-
- Returns
- -------
- plt : CDF curves
- the CDF curves of "Number of Followers" of SH spanners and ordinary users in graph G.
- """
- importmatplotlib.pyplotasplt
- importnumpyasnp
- importstatsmodels.apiassm
-
- OU=[]
- foriinG:
- ifinotinSHS:
- OU.append(i)
- degree=G.degree()
- sample1=[]
- sample2=[]
- foriindegree.keys():
- ifiinOU:
- sample1.append(degree[i])
- elifiinSHS:
- sample2.append(degree[i])
- X1=np.linspace(min(sample1),max(sample1))
- ecdf=sm.distributions.ECDF(sample1)
- Y1=ecdf(X1)
- X2=np.linspace(min(sample2),max(sample2))
- ecdf=sm.distributions.ECDF(sample2)
- Y2=ecdf(X2)
- plt.plot(X1,Y1,"b--",label="Ordinary User")
- plt.plot(X2,Y2,"r",label="SH Spanner")
- plt.title("Number of Followers")
- plt.xlabel("Number of Followers")
- plt.ylabel("Cumulative Distribution Function")
- plt.legend(loc="lower right")
- plt.show()
-
-
-# Number of Connected Communities
-
[docs]defplot_Connected_Communities(G,SHS):
-"""
- Returns the CDF curves of "Number of Connected Communities" of SH spanners and ordinary users in graph G.
-
- Parameters
- ----------
- G : graph
- A easygraph graph.
-
- SHS : list
- The SH Spanners in graph G.
-
- Returns
- -------
- plt : CDF curves
- the CDF curves of "Number of Connected Communities" of SH spanners and ordinary users in graph G.
- """
- importmatplotlib.pyplotasplt
- importnumpyasnp
- importstatsmodels.apiassm
-
- OU=[]
- foriinG:
- ifinotinSHS:
- OU.append(i)
- sample1=[]
- sample2=[]
- cmts=eg.LPA(G)
- foriinOU:
- s=set()
- neighbors=G.neighbors(node=i)
- forjinneighbors:
- forkincmts:
- ifjincmts[k]:
- s.add(k)
- sample1.append(len(s))
- foriinSHS:
- s=set()
- neighbors=G.neighbors(node=i)
- forjinneighbors:
- forkincmts:
- ifjincmts[k]:
- s.add(k)
- sample2.append(len(s))
- print(len(cmts))
- print(sample1)
- print(sample2)
- X1=np.linspace(min(sample1),max(sample1))
- ecdf=sm.distributions.ECDF(sample1)
- Y1=ecdf(X1)
- X2=np.linspace(min(sample2),max(sample2))
- ecdf=sm.distributions.ECDF(sample2)
- Y2=ecdf(X2)
- plt.plot(X1,Y1,"b--",label="Ordinary User")
- plt.plot(X2,Y2,"r",label="SH Spanner")
- plt.title("Number of Connected Communities")
- plt.xlabel("Number of Connected Communities")
- plt.ylabel("Cumulative Distribution Function")
- plt.legend(loc="lower right")
- plt.show()
-
-
-# Betweenness Centrality
-
[docs]defplot_Betweenness_Centrality(G,SHS):
-"""
- Returns the CDF curves of "Betweenness Centralitys" of SH spanners and ordinary users in graph G.
-
- Parameters
- ----------
- G : graph
- A easygraph graph.
-
- SHS : list
- The SH Spanners in graph G.
-
- Returns
- -------
- plt : CDF curves
- the CDF curves of "Betweenness Centrality" of SH spanners and ordinary users in graph G.
- """
- importmatplotlib.pyplotasplt
- importnumpyasnp
- importstatsmodels.apiassm
-
- OU=[]
- foriinG:
- ifinotinSHS:
- OU.append(i)
- bc=eg.betweenness_centrality(G)
- sample1=[]
- sample2=[]
- foriinbc.keys():
- ifiinOU:
- sample1.append(bc[i])
- elifiinSHS:
- sample2.append(bc[i])
- X1=np.linspace(min(sample1),max(sample1))
- ecdf=sm.distributions.ECDF(sample1)
- Y1=ecdf(X1)
- X2=np.linspace(min(sample2),max(sample2))
- ecdf=sm.distributions.ECDF(sample2)
- Y2=ecdf(X2)
- plt.plot(X1,Y1,"b--",label="Ordinary User")
- plt.plot(X2,Y2,"r",label="SH Spanner")
- plt.title("Betweenness Centrality")
- plt.xlabel("Betweenness Centrality")
- plt.ylabel("Cumulative Distribution Function")
- plt.legend(loc="lower right")
- plt.show()
-
-
-# Arg. Number of Followers of the Neighborhood Users
-
[docs]defplot_Neighborhood_Followers(G,SHS):
-"""
- Returns the CDF curves of "Arg. Number of Followers of the Neighborhood Users" of SH spanners and ordinary users in graph G.
-
- Parameters
- ----------
- G : graph
- A easygraph graph.
-
- SHS : list
- The SH Spanners in graph G.
-
- Returns
- -------
- plt : CDF curves
- the CDF curves of "Arg. Number of Followers of the Neighborhood Users
- " of SH spanners and ordinary users in graph G.
- """
- importmatplotlib.pyplotasplt
- importnumpyasnp
- importstatsmodels.apiassm
-
- OU=[]
- foriinG:
- ifinotinSHS:
- OU.append(i)
- sample1=[]
- sample2=[]
- degree=G.degree()
- foriinOU:
- num=0
- sum=0
- forneighborinG.neighbors(node=i):
- num=num+1
- sum=sum+degree[neighbor]
- sample1.append(sum/num)
- foriinSHS:
- num=0
- sum=0
- forneighborinG.neighbors(node=i):
- num=num+1
- sum=sum+degree[neighbor]
- sample2.append(sum/num)
- X1=np.linspace(min(sample1),max(sample1))
- ecdf=sm.distributions.ECDF(sample1)
- Y1=ecdf(X1)
- X2=np.linspace(min(sample2),max(sample2))
- ecdf=sm.distributions.ECDF(sample2)
- Y2=ecdf(X2)
- plt.plot(X1,Y1,"b--",label="Ordinary User")
- plt.plot(X2,Y2,"r",label="SH Spanner")
- plt.title("Arg. Number of Followers of the Neighborhood Users")
- plt.xlabel("Arg. Number of Followers of the Neighborhood Users")
- plt.ylabel("Cumulative Distribution Function")
- plt.legend(loc="lower right")
- plt.show()
[docs]defrandom_position(G,center=None,dim=2,random_seed=None):
-"""
- Returns random position for each node in graph G.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- center : array-like or None, optional (default : None)
- Coordinate pair around which to center the layout
-
- dim : int, optional (default : 2)
- Dimension of layout
-
- random_seed : int or None, optional (default : None)
- Seed for RandomState instance
-
- Returns
- ----------
- pos : dict
- A dictionary of positions keyed by node
- """
- importnumpyasnp
-
- center=_get_center(center,dim)
-
- rng=np.random.RandomState(seed=random_seed)
- pos=rng.rand(len(G),dim)+center
- pos=pos.astype(np.float32)
- pos=dict(zip(G,pos))
-
- returnpos
-
-
-
[docs]defcircular_position(G,center=None,scale=1):
-"""
- Position nodes on a circle, the dimension is 2.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
- A position will be assigned to every node in G
-
- center : array-like or None, optional (default : None)
- Coordinate pair around which to center the layout
-
- scale : number, optional (default : 1)
- Scale factor for positions
-
- Returns
- -------
- pos : dict
- A dictionary of positions keyed by node
- """
- importnumpyasnp
-
- center=_get_center(center,dim=2)
-
- iflen(G)==0:
- pos={}
- eliflen(G)==1:
- pos={G.nodes[0]:center}
- else:
- theta=np.linspace(0,1,len(G),endpoint=False)*2*np.pi
- theta=theta.astype(np.float32)
- pos=np.column_stack([np.cos(theta),np.sin(theta)])
- pos=rescale_position(pos,scale=scale)+center
- pos=dict(zip(G,pos))
-
- returnpos
-
-
-
[docs]defshell_position(G,nlist=None,scale=1,center=None):
-"""
- Position nodes in concentric circles, the dimension is 2.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- nlist : list of lists or None, optional (default : None)
- List of node lists for each shell.
-
- scale : number, optional (default : 1)
- Scale factor for positions.
-
- center : array-like or None, optional (default : None)
- Coordinate pair around which to center the layout.
-
-
- Returns
- -------
- pos : dict
- A dictionary of positions keyed by node
-
- Notes
- -----
- This algorithm currently only works in two dimensions and does not
- try to minimize edge crossings.
-
- """
- importnumpyasnp
-
- center=_get_center(center,dim=2)
-
- iflen(G)==0:
- return{}
- iflen(G)==1:
- return{G.nodes[0]:center}
-
- ifnlistisNone:
- # draw the whole graph in one shell
- nlist=[list(G)]
-
- iflen(nlist[0])==1:
- # single node at center
- radius=0.0
- else:
- # else start at r=1
- radius=1.0
-
- npos={}
- fornodesinnlist:
- # Discard the extra angle since it matches 0 radians.
- theta=np.linspace(0,1,len(nodes),endpoint=False)*2*np.pi
- theta=theta.astype(np.float32)
- pos=np.column_stack([np.cos(theta),np.sin(theta)])
- iflen(pos)>1:
- pos=rescale_position(pos,scale=scale*radius/len(nlist))+center
- else:
- pos=np.array([(scale*radius+center[0],center[1])])
- npos.update(zip(nodes,pos))
- radius+=1.0
-
- returnnpos
-
-
-def_get_center(center,dim):
- importnumpyasnp
-
- ifcenterisNone:
- center=np.zeros(dim)
- else:
- center=np.asarray(center)
-
- ifdim<2:
- raiseValueError("cannot handle dimensions < 2")
-
- iflen(center)!=dim:
- msg="length of center coordinates must match dimension of layout"
- raiseValueError(msg)
-
- returncenter
-
-
-
[docs]defrescale_position(pos,scale=1):
-"""
- Returns scaled position array to (-scale, scale) in all axes.
-
- Parameters
- ----------
- pos : numpy array
- positions to be scaled. Each row is a position.
-
- scale : number, optional (default : 1)
- The size of the resulting extent in all directions.
-
- Returns
- -------
- pos : numpy array
- scaled positions. Each row is a position.
- """
- # Find max length over all dimensions
- lim=0# max coordinate for all axes
- foriinrange(pos.shape[1]):
- pos[:,i]-=pos[:,i].mean()
- lim=max(abs(pos[:,i]).max(),lim)
- # rescale to (-scale, scale) in all directions, preserves aspect
- iflim>0:
- foriinrange(pos.shape[1]):
- pos[:,i]*=scale/lim
- returnpos
-
-
-
[docs]defkamada_kawai_layout(
- G,dist=None,pos=None,weight="weight",scale=1,center=None,dim=2
-):
-"""Position nodes using Kamada-Kawai basic-length cost-function.
-
- Parameters
- ----------
- G : graph or list of nodes
- A position will be assigned to every node in G.
-
- dist : dict (default=None)
- A two-level dictionary of optimal distances between nodes,
- indexed by source and destination node.
- If None, the distance is computed using shortest_path_length().
-
- pos : dict or None optional (default=None)
- Initial positions for nodes as a dictionary with node as keys
- and values as a coordinate list or tuple. If None, then use
- circular_layout() for dim >= 2 and a linear layout for dim == 1.
-
- weight : string or None optional (default='weight')
- The edge attribute that holds the numerical value used for
- the edge weight. If None, then all edge weights are 1.
-
- scale : number (default: 1)
- Scale factor for positions.
-
- center : array-like or None
- Coordinate pair around which to center the layout.
-
- dim : int
- Dimension of layout.
-
- Returns
- -------
- pos : dict
- A dictionary of positions keyed by node
-
- Examples
- --------
- >>> pos = eg.kamada_kawai_layout(G)
- """
- importnumpyasnp
-
- nNodes=len(G)
- ifnNodes==0:
- return{}
-
- ifdistisNone:
- dist=dict(eg.Floyd(G))
- dist_mtx=1e6*np.ones((nNodes,nNodes))
- forrow,nrinenumerate(G):
- ifnrnotindist:
- continue
- rdist=dist[nr]
- forcol,ncinenumerate(G):
- ifncnotinrdist:
- continue
- dist_mtx[row][col]=rdist[nc]
-
- ifposisNone:
- ifdim>=3:
- pos=eg.random_position(G,dim=dim)
- elifdim==2:
- pos=eg.circular_position(G)
- else:
- pos={n:ptforn,ptinzip(G,np.linspace(0,1,len(G)))}
-
- pos_arr=np.array([pos[n]forninG])
-
- pos=_kamada_kawai_solve(dist_mtx,pos_arr,dim)
-
- ifcenterisNone:
- center=np.zeros(dim)
- else:
- center=np.asarray(center)
-
- iflen(center)!=dim:
- msg="length of center coordinates must match dimension of layout"
- raiseValueError(msg)
-
- pos=eg.rescale_position(pos,scale=scale)+center
- returndict(zip(G,pos))
-
-
-def_kamada_kawai_solve(dist_mtx,pos_arr,dim):
- # Anneal node locations based on the Kamada-Kawai cost-function,
- # using the supplied matrix of preferred inter-node distances,
- # and starting locations.
-
- importnumpyasnp
-
- fromscipy.optimizeimportminimize
-
- meanwt=1e-3
- costargs=(np,1/(dist_mtx+np.eye(dist_mtx.shape[0])*1e-3),meanwt,dim)
-
- optresult=minimize(
- _kamada_kawai_costfn,
- pos_arr.ravel(),
- method="L-BFGS-B",
- args=costargs,
- jac=True,
- )
-
- returnoptresult.x.reshape((-1,dim))
-
-
-def_kamada_kawai_costfn(pos_vec,np,invdist,meanweight,dim):
- # Cost-function and gradient for Kamada-Kawai layout algorithm
- nNodes=invdist.shape[0]
- pos_arr=pos_vec.reshape((nNodes,dim))
-
- delta=pos_arr[:,np.newaxis,:]-pos_arr[np.newaxis,:,:]
- nodesep=np.linalg.norm(delta,axis=-1)
- direction=np.einsum("ijk,ij->ijk",delta,1/(nodesep+np.eye(nNodes)*1e-3))
-
- offset=nodesep*invdist-1.0
- offset[np.diag_indices(nNodes)]=0
-
- cost=0.5*np.sum(offset**2)
- grad=np.einsum("ij,ij,ijk->ik",invdist,offset,direction)-np.einsum(
- "ij,ij,ijk->jk",invdist,offset,direction
- )
-
- # Additional parabolic term to encourage mean position to be near origin:
- sumpos=np.sum(pos_arr,axis=0)
- cost+=0.5*meanweight*np.sum(sumpos**2)
- grad+=meanweight*sumpos
-
- return(cost,grad.ravel())
-
[docs]@not_implemented_for("multigraph")
-defnode2vec(
- G,
- dimensions=128,
- walk_length=80,
- num_walks=10,
- p=1.0,
- q=1.0,
- weight_key=None,
- workers=None,
- **skip_gram_params,
-):
-"""Graph embedding via Node2Vec.
-
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
-
- dimensions : int
- Embedding dimensions, optional(default: 128)
-
- walk_length : int
- Number of nodes in each walk, optional(default: 80)
-
- num_walks : int
- Number of walks per node, optional(default: 10)
-
- p : float
- The return hyper parameter, optional(default: 1.0)
-
- q : float
- The input parameter, optional(default: 1.0)
-
- weight_key : string or None (default: None)
- On weighted graphs, this is the key for the weight attribute
-
- workers : int or None, optional(default : None)
- The number of workers generating random walks (default: None). None if not using only one worker.
-
- skip_gram_params : dict
- Parameters for gensim.models.Word2Vec - do not supply 'size', it is taken from the 'dimensions' parameter
-
- Returns
- -------
- embedding_vector : dict
- The embedding vector of each node
-
- most_similar_nodes_of_node : dict
- The most similar nodes of each node and its similarity
-
- Examples
- --------
-
- >>> node2vec(G,
- ... dimensions=128, # The graph embedding dimensions.
- ... walk_length=80, # Walk length of each random walks.
- ... num_walks=10, # Number of random walks.
- ... p=1.0, # The `p` possibility in random walk in [1]_
- ... q=1.0, # The `q` possibility in random walk in [1]_
- ... weight_key='weight',
- ... skip_gram_params=dict( # The skip_gram parameters in Python package gensim.
- ... window=10,
- ... min_count=1,
- ... batch_words=4
- ... ))
-
- References
- ----------
- .. [1] https://arxiv.org/abs/1607.00653
-
- """
- G_index,index_of_node,node_of_index=G.to_index_node_graph()
-
- ifworkersisNone:
- walks=simulate_walks(
- G_index,
- walk_length=walk_length,
- num_walks=num_walks,
- p=p,
- q=q,
- weight_key=weight_key,
- )
- else:
- fromjoblibimportParallel
- fromjoblibimportdelayed
-
- num_walks_lists=np.array_split(range(num_walks),workers)
- walks=Parallel(n_jobs=workers)(
- delayed(simulate_walks)(
- G_index,walk_length,len(num_walks),p,q,weight_key
- )
- fornum_walksinnum_walks_lists
- )
- # Change multidimensional array to one dimensional array
- walks=[walkforwalk_groupinwalksforwalkinwalk_group]
-
- model=learn_embeddings(walks=walks,dimensions=dimensions,**skip_gram_params)
-
- (
- embedding_vector,
- most_similar_nodes_of_node,
- )=_get_embedding_result_from_gensim_skipgram_model(
- G=G,index_of_node=index_of_node,node_of_index=node_of_index,model=model
- )
-
- delG_index
- returnembedding_vector,most_similar_nodes_of_node
[docs]deferdos_renyi_M(n,edge,directed=False,FilePath=None):
-"""Given the number of nodes and the number of edges, return an Erdős-Rényi random graph, and store the graph in a document.
-
- Parameters
- ----------
- n : int
- The number of nodes.
- edge : int
- The number of edges.
- directed : bool, optional (default=False)
- If True, this function returns a directed graph.
- FilePath : string
- The file for storing the output graph G.
-
- Returns
- -------
- G : graph
- an Erdős-Rényi random graph.
-
- Examples
- --------
- Returns an Erdős-Rényi random graph G.
-
- >>> erdos_renyi_M(100,180,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt")
-
- References
- ----------
- .. [1] P. Erdős and A. Rényi, On Random Graphs, Publ. Math. 6, 290 (1959).
- .. [2] E. N. Gilbert, Random Graphs, Ann. Math. Stat., 30, 1141 (1959).
- """
- ifdirected:
- G=eg.DiGraph()
- adjacent={}
- mmax=n*(n-1)
- ifedge>=mmax:
- foriinrange(n):
- forjinrange(n):
- ifi!=j:
- G.add_edge(i,j)
- ifinotinadjacent:
- adjacent[i]=[]
- adjacent[i].append(j)
- else:
- adjacent[i].append(j)
- returnG
- count=0
- whilecount<edge:
- i=random.randint(0,n-1)
- j=random.randint(0,n-1)
- ifi==jorG.has_edge(i,j):
- continue
- else:
- count=count+1
- ifinotinadjacent:
- adjacent[i]=[]
- adjacent[i].append(j)
- else:
- adjacent[i].append(j)
- G.add_edge(i,j)
- else:
- G=eg.Graph()
- adjacent={}
- mmax=n*(n-1)/2
- ifedge>=mmax:
- foriinrange(n):
- forjinrange(n):
- ifi!=j:
- G.add_edge(i,j)
- ifinotinadjacent:
- adjacent[i]=[]
- adjacent[i].append(j)
- else:
- adjacent[i].append(j)
- ifjnotinadjacent:
- adjacent[j]=[]
- adjacent[j].append(i)
- else:
- adjacent[j].append(i)
- returnG
- count=0
- whilecount<edge:
- i=random.randint(0,n-1)
- j=random.randint(0,n-1)
- ifi==jorG.has_edge(i,j):
- continue
- else:
- count=count+1
- ifinotinadjacent:
- adjacent[i]=[]
- adjacent[i].append(j)
- else:
- adjacent[i].append(j)
- ifjnotinadjacent:
- adjacent[j]=[]
- adjacent[j].append(i)
- else:
- adjacent[j].append(i)
- G.add_edge(i,j)
-
- writeRandomNetworkToFile(n,adjacent,FilePath)
- returnG
-
-
-
[docs]deferdos_renyi_P(n,p,directed=False,FilePath=None):
-"""Given the number of nodes and the probability of edge creation, return an Erdős-Rényi random graph, and store the graph in a document.
-
- Parameters
- ----------
- n : int
- The number of nodes.
- p : float
- Probability for edge creation.
- directed : bool, optional (default=False)
- If True, this function returns a directed graph.
- FilePath : string
- The file for storing the output graph G.
-
- Returns
- -------
- G : graph
- an Erdős-Rényi random graph.
-
- Examples
- --------
- Returns an Erdős-Rényi random graph G
-
- >>> erdos_renyi_P(100,0.5,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt")
-
- References
- ----------
- .. [1] P. Erdős and A. Rényi, On Random Graphs, Publ. Math. 6, 290 (1959).
- .. [2] E. N. Gilbert, Random Graphs, Ann. Math. Stat., 30, 1141 (1959).
- """
- ifdirected:
- G=eg.DiGraph()
- adjacent={}
- probability=0.0
- foriinrange(n):
- forjinrange(i+1,n):
- probability=random.random()
- ifprobability<p:
- ifinotinadjacent:
- adjacent[i]=[]
- adjacent[i].append(j)
- else:
- adjacent[i].append(j)
- G.add_edge(i,j)
- else:
- G=eg.Graph()
- adjacent={}
- probability=0.0
- foriinrange(n):
- forjinrange(i+1,n):
- probability=random.random()
- ifprobability<p:
- ifinotinadjacent:
- adjacent[i]=[]
- adjacent[i].append(j)
- else:
- adjacent[i].append(j)
- ifjnotinadjacent:
- adjacent[j]=[]
- adjacent[j].append(i)
- else:
- adjacent[j].append(i)
- G.add_edge(i,j)
-
- writeRandomNetworkToFile(n,adjacent,FilePath)
- returnG
-
-
-
[docs]deffast_erdos_renyi_P(n,p,directed=False,FilePath=None):
-"""Given the number of nodes and the probability of edge creation, return an Erdős-Rényi random graph, and store the graph in a document. Use this function for generating a huge scale graph.
-
- Parameters
- ----------
- n : int
- The number of nodes.
- p : float
- Probability for edge creation.
- directed : bool, optional (default=False)
- If True, this function returns a directed graph.
- FilePath : string
- The file for storing the output graph G.
-
- Returns
- -------
- G : graph
- an Erdős-Rényi random graph.
-
- Examples
- --------
- Returns an Erdős-Rényi random graph G
-
- >>> erdos_renyi_P(100,0.5,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt")
-
- References
- ----------
- .. [1] P. Erdős and A. Rényi, On Random Graphs, Publ. Math. 6, 290 (1959).
- .. [2] E. N. Gilbert, Random Graphs, Ann. Math. Stat., 30, 1141 (1959).
- """
- ifdirected:
- G=eg.DiGraph()
- w=-1
- lp=math.log(1.0-p)
- v=0
- adjacent={}
- whilev<n:
- lr=math.log(1.0-random.random())
- w=w+1+int(lr/lp)
- ifv==w:# avoid self loops
- w=w+1
- whilev<n<=w:
- w=w-n
- v=v+1
- ifv==w:# avoid self loops
- w=w+1
- ifv<n:
- G.add_edge(v,w)
- ifvnotinadjacent:
- adjacent[v]=[]
- adjacent[v].append(w)
- else:
- adjacent[v].append(w)
- else:
- G=eg.Graph()
- w=-1
- lp=math.log(1.0-p)
- v=1
- adjacent={}
- whilev<n:
- lr=math.log(1.0-random.random())
- w=w+1+int(lr/lp)
- whilew>=vandv<n:
- w=w-v
- v=v+1
- ifv<n:
- G.add_edge(v,w)
- ifvnotinadjacent:
- adjacent[v]=[]
- adjacent[v].append(w)
- else:
- adjacent[v].append(w)
- ifwnotinadjacent:
- adjacent[w]=[]
- adjacent[w].append(v)
- else:
- adjacent[w].append(v)
-
- writeRandomNetworkToFile(n,adjacent,FilePath)
- returnG
-
-
-
[docs]defWS_Random(n,k,p,FilePath=None):
-"""Returns a small-world graph.
-
- Parameters
- ----------
- n : int
- The number of nodes
- k : int
- Each node is joined with its `k` nearest neighbors in a ring
- topology.
- p : float
- The probability of rewiring each edge
- FilePath : string
- The file for storing the output graph G
-
- Returns
- -------
- G : graph
- a small-world graph
-
- Examples
- --------
- Returns a small-world graph G
-
- >>> WS_Random(100,10,0.3,"/users/fudanmsn/downloads/RandomNetwork.txt")
-
- """
- ifk>=n:
- print("k>=n, choose smaller k or larger n")
- return
- adjacent={}
- G=eg.Graph()
- NUM1=n
- NUM2=NUM1-1
- K=k
- K1=K+1
- N=list(range(NUM1))
- G.add_nodes(N)
-
- foriinrange(NUM1):
- forjinrange(1,K1):
- K_add=NUM1-K
- i_add_j=i+j+1
- ifi>=K_addandi_add_j>NUM1:
- i_add=i+j-NUM1
- G.add_edge(i,i_add)
- else:
- i_add=i+j
- G.add_edge(i,i_add)
- ifinotinadjacent:
- adjacent[i]=[]
- adjacent[i].append(i_add)
- else:
- adjacent[i].append(i_add)
- ifi_addnotinadjacent:
- adjacent[i_add]=[]
- adjacent[i_add].append(i)
- else:
- adjacent[i_add].append(i)
- foriinrange(NUM1):
- fore_delinrange(i+1,i+K1):
- ife_del>=NUM1:
- e_del=e_del-NUM1
- P_random=random.random()
- ifP_random<p:
- G.remove_edge(i,e_del)
- adjacent[i].remove(e_del)
- ifadjacent[i]==[]:
- adjacent.pop(i)
- adjacent[e_del].remove(i)
- ifadjacent[e_del]==[]:
- adjacent.pop(e_del)
- e_add=random.randint(0,NUM2)
- whilee_add==iorG.has_edge(i,e_add)==True:
- e_add=random.randint(0,NUM2)
- G.add_edge(i,e_add)
- ifinotinadjacent:
- adjacent[i]=[]
- adjacent[i].append(e_add)
- else:
- adjacent[i].append(e_add)
- ife_addnotinadjacent:
- adjacent[e_add]=[]
- adjacent[e_add].append(i)
- else:
- adjacent[e_add].append(i)
- writeRandomNetworkToFile(n,adjacent,FilePath)
- returnG
[docs]@nodes_or_number(0)
-defcomplete_graph(n,create_using=None):
-"""Return the complete graph `K_n` with n nodes.
-
- A complete graph on `n` nodes means that all pairs
- of distinct nodes have an edge connecting them.
-
- Parameters
- ----------
- n : int or iterable container of nodes
- If n is an integer, nodes are from range(n).
- If n is a container of nodes, those nodes appear in the graph.
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
-
- Examples
- --------
- >>> G = eg.complete_graph(9)
- >>> len(G)
- 9
- >>> G.size()
- 36
- >>> G = eg.complete_graph(range(11, 14))
- >>> list(G.nodes())
- [11, 12, 13]
- >>> G = eg.complete_graph(4, eg.DiGraph())
- >>> G.is_directed()
- True
-
- """
- n_name,nodes=n
- G=empty_graph(n_name,create_using)
- iflen(nodes)>1:
- ifG.is_directed():
- edges=itertools.permutations(nodes,2)
- else:
- edges=itertools.combinations(nodes,2)
- G.add_edges_from(edges)
- returnG
Source code for easygraph.functions.hypergraph.assortativity
-"""Algorithms for finding the degree assortativity of a hypergraph."""
-
-importrandom
-
-fromitertoolsimportcombinations
-
-importnumpy
-importnumpyasnp
-
-fromeasygraph.utils.exceptionimportEasyGraphError
-
-
-__all__=["dynamical_assortativity","degree_assortativity"]
-
-
-
[docs]defdynamical_assortativity(H):
-"""Computes the dynamical assortativity of a uniform hypergraph.
-
- Parameters
- ----------
- H : eg.Hypergraph
- Hypergraph of interest
-
- Returns
- -------
- float
- The dynamical assortativity
-
- See Also
- --------
- degree_assortativity
-
- Raises
- ------
- EasyGraphError
- If the hypergraph is not uniform, or if there are no nodes
- or no edges
-
- References
- ----------
- Nicholas Landry and Juan G. Restrepo,
- Hypergraph assortativity: A dynamical systems perspective,
- Chaos 2022.
- DOI: 10.1063/5.0086905
-
- """
- iflen(H.v)==0:
- raiseEasyGraphError("Hypergraph must contain nodes")
- eliflen(H.e[0])==0:
- raiseEasyGraphError("Hypergraph must contain edges!")
-
- ifnotH.is_uniform():
- raiseEasyGraphError("Hypergraph must be uniform!")
-
- if1inH.unique_edge_sizes():
- raiseEasyGraphError("No singleton edges!")
-
- degs=H.deg_v
- k1=sum(degs)/len(degs)
- k2=np.mean(numpy.array(degs)**2)
- kk1=np.mean(
- [degs[n1]*degs[n2]foreinH.e[0]forn1,n2incombinations(e,2)]
- )
-
- returnkk1*k1**2/k2**2-1
-
-
-
[docs]defdegree_assortativity(H,kind="uniform",exact=False,num_samples=1000):
-"""Computes the degree assortativity of a hypergraph
-
- Parameters
- ----------
- H : Hypergraph
- The hypergraph of interest
- kind : str, optional
- the type of degree assortativity. valid choices are
- "uniform", "top-2", and "top-bottom". By default, "uniform".
- exact : bool, optional
- whether to compute over all edges or sample randomly from the
- set of edges. By default, False.
- num_samples : int, optional
- if not exact, specify the number of samples for the computation.
- By default, 1000.
-
- Returns
- -------
- float
- the degree assortativity
-
- Raises
- ------
- EasyGraphError
- If there are no nodes or no edges
-
- See Also
- --------
- dynamical_assortativity
-
- References
- ----------
- Phil Chodrow,
- Configuration models of random hypergraphs,
- Journal of Complex Networks 2020.
- DOI: 10.1093/comnet/cnaa018
- """
-
- iflen(H.v)==0:
- raiseEasyGraphError("Hypergraph must contain nodes")
- eliflen(H.e[0])==0:
- raiseEasyGraphError("Hypergraph must contain edges!")
-
- degs=H.deg_v
- ifexact:
- k1k2=[_choose_degrees(e,degs,kind)foreinH.e[0]iflen(e)>1]
- else:
- edges=[eforeinH.e[0]iflen(e)>1]
- k1k2=[
- _choose_degrees(random.choice(H.e[0]),degs,kind)
- for_inrange(num_samples)
- ]
-
- rho=np.corrcoef(np.array(k1k2).T)[0,1]
- ifnp.isnan(rho):
- return0
- returnrho
-
-
-def_choose_degrees(e,k,kind="uniform"):
-"""Choose the degrees of two nodes in a hyperedge.
-
- Parameters
- ----------
- e : iterable
- the members in a hyperedge
- k : dict
- the degrees where keys are node IDs and values are degrees
- kind : str, optional
- the type of degree assortativity, options are "uniform", "top-2",
- and "top-bottom". By default, "uniform".
-
- Returns
- -------
- tuple
- two degrees selected from the edge
-
- Raises
- ------
- EasyGraphError
- if invalid assortativity function chosen
-
- See Also
- --------
- degree_assortativity
-
- References
- ----------
- Phil Chodrow,
- Configuration models of random hypergraphs,
- Journal of Complex Networks 2020.
- DOI: 10.1093/comnet/cnaa018
- """
- e=list(e)
- iflen(e)>1:
- ifkind=="uniform":
- i=np.random.randint(len(e))
- j=i
- whilei==j:
- j=np.random.randint(len(e))
- return(k[e[i]],k[e[j]])
-
- elifkind=="top-2":
- degs=sorted([k[i]foriine])[-2:]
- random.shuffle(degs)
- returndegs
-
- elifkind=="top-bottom":
- # this selects the largest and smallest degrees in one line
- degs=sorted([k[i]foriine])[::len(e)-1]
- random.shuffle(degs)
- returndegs
-
- else:
- raiseEasyGraphError("Invalid choice function!")
- else:
- raiseEasyGraphError("Edge must have more than one member!")
-
[docs]defs_betweenness(H,s=1,weight=False,n_workers=None):
-"""Computes the betweenness centrality for each edge in the hypergraph.
-
- Computes the betweenness centrality for each edge in the hypergraph.
-
- Parameters
- ----------
- H : eg.Hypergraph.
- The hypergraph to compute
-
- s : int, optional.
-
- Returns
- ----------
- dict
- The keys are the edges and the values are the betweenness centrality.
- The betweenness centrality for each edge in the hypergraph.
-
-
- """
-
- linegraph=H.get_linegraph(s=s,weight=weight)
- results=eg.betweenness_centrality(linegraph,n_workers=n_workers)
- returnresults
-
-
-
[docs]defs_closeness(H,s=1,weight=False,n_workers=None):
-"""
- Compute the closeness centrality for each edge in the hypergraph.
-
- Parameters
- ----------
- H : eg.Hypergraph.
- s : int, optional
-
- Returns
- -------
- dict. The closeness centrality for each edge in the hypergraph. The keys are the edges and the values are the closeness centrality.
- """
- linegraph=H.get_linegraph(s=s,weight=weight)
- results=eg.closeness_centrality(linegraph,n_workers=n_workers)
- returnresults
-
-
-
[docs]defs_eccentricity(H,s=1,edges=True,source=None):
-r"""
- The length of the longest shortest path from a vertex $u$ to every other vertex in
- the s-linegraph.
- $V$ = set of vertices in the s-linegraph
- $d$ = shortest path distance
-
- .. math::
-
- \text{s-ecc}(u) = \text{max}\{d(u,v): v \in V\}
-
- Parameters
- ----------
- H : eg.Hypergraph
-
- s : int, optional
-
- edges : bool, optional
- Indicates if method should compute edge linegraph (default) or node linegraph.
-
- source : str, optional
- Identifier of node or edge of interest for computing centrality
-
- Returns
- -------
- dict or float
- returns the s-eccentricity value of the edges(nodes).
- If source=None a dictionary of values for each s-edge in H is returned.
- If source then a single value is returned.
- If the s-linegraph is disconnected, np.inf is returned.
-
- """
-
- g=H.get_linegraph(s=s,edges=edges)
- result=eg.eccentricity(g)
- ifsource:
- returnresult[source]
- else:
- returnresult
[docs]defvector_centrality(H):
-"""The vector centrality of nodes in the line graph of the hypergraph.
-
- Parameters
- ----------
- H : eg.Hypergraph
-
-
- Returns
- -------
- dict
- Centrality, where keys are node IDs and values are lists of centralities.
-
- References
- ----------
- "Vector centrality in hypergraphs", K. Kovalenko, M. Romance, E. Vasilyeva,
- D. Aleja, R. Criado, D. Musatov, A.M. Raigorodskii, J. Flores, I. Samoylenko,
- K. Alfaro-Bittner, M. Perc, S. Boccaletti,
- https://doi.org/10.1016/j.chaos.2022.112397
-
- """
-
- # If the hypergraph is empty, then return an empty dictionary
- ifH.num_v==0:
- returndict()
-
- LG=H.get_linegraph()
- ifnoteg.is_connected(LG):
- raiseEasyGraphError("This method is not defined for disconnected hypergraphs.")
- LGcent=eg.eigenvector_centrality(LG)
-
- vc={node:[]fornodeinrange(0,H.num_v)}
-
- edge_label_dict={tuple(edge):indexforindex,edgeinenumerate(H.e[0])}
-
- hyperedge_dims={tuple(edge):len(edge)foredgeinH.e[0]}
-
- D=max([len(e)foreinH.e[0]])
-
- forkinrange(2,D+1):
- c_i=np.zeros(H.num_v)
-
- foredge,_inlist(filter(lambdax:x[1]==k,hyperedge_dims.items())):
- fornodeinedge:
- try:
- c_i[node]+=LGcent[edge_label_dict[edge]]
- exceptIndexError:
- raiseException(
- "Nodes must be written with the Pythonic indexing (0,1,2...)"
- )
-
- c_i*=1/k
-
- fornodeinrange(H.num_v):
- vc[node].append(c_i[node])
-
- returnvc
[docs]defhypergraph_clustering_coefficient(H):
-r"""Return the clustering coefficients for
- each node in a Hypergraph.
-
- This clustering coefficient is defined as the
- clustering coefficient of the unweighted pairwise
- projection of the hypergraph, i.e.,
- :math:`c = A^3_{i,i}/\binom{k}{2},`
- where :math:`A` is the adjacency matrix of the network
- and :math:`k` is the pairwise degree of :math:`i`.
-
- Parameters
- ----------
- H : Hypergraph
- Hypergraph
-
- Returns
- -------
- dict
- nodes are keys, clustering coefficients are values.
-
- Notes
- -----
- The clustering coefficient is undefined when the number of
- neighbors is 0 or 1, but we set the clustering coefficient
- to 0 in these cases. For more discussion, see
- https://arxiv.org/abs/0802.2512
-
- See Also
- --------
- local_clustering_coefficient
- two_node_clustering_coefficient
-
- References
- ----------
- "Clustering Coefficients in Protein Interaction Hypernetworks"
- by Suzanne Gallagher and Debra Goldberg.
- DOI: 10.1145/2506583.2506635
-
- Example
- -------
- >>> import easygraph as eg
- >>> H = eg.random_hypergraph(3, [1, 1])
- >>> cc = eg.clustering_coefficient(H)
- >>> cc
- {0: 1.0, 1: 1.0, 2: 1.0}
- """
- adj=H.adjacency_matrix()
- k=np.array(adj.sum(axis=1))
- l=[]
- foriink:
- l.append(i[0])
- k=np.array(l)
- denom=k*(k-1)/2
- mat=adj.dot(adj).dot(adj)
- withnp.errstate(divide="ignore",invalid="ignore"):
- result=np.nan_to_num(0.5*mat.diagonal()/denom)
- r={}
- foriinrange(0,len(H.v)):
- r[i]=result[i]
- returnr
-
-
-
[docs]defhypergraph_local_clustering_coefficient(H):
-"""Compute the local clustering coefficient.
-
- This clustering coefficient is based on the
- overlap of the edges connected to a given node,
- normalized by the size of the node's neighborhood.
-
- Parameters
- ----------
- H : Hypergraph
- Hypergraph
-
- Returns
- -------
- dict
- keys are node IDs and values are the
- clustering coefficients.
-
- Notes
- -----
- The clustering coefficient is undefined when the number of
- neighbors is 0 or 1, but we set the clustering coefficient
- to 0 in these cases. For more discussion, see
- https://arxiv.org/abs/0802.2512
-
- See Also
- --------
- clustering_coefficient
- two_node_clustering_coefficient
-
- References
- ----------
- "Properties of metabolic graphs: biological organization or representation
- artifacts?" by Wanding Zhou and Luay Nakhleh.
- https://doi.org/10.1186/1471-2105-12-132
-
- "Hypergraphs for predicting essential genes using multiprotein complex data"
- by Florian Klimm, Charlotte M. Deane, and Gesine Reinert.
- https://doi.org/10.1093/comnet/cnaa028
-
- Example
- -------
- >>> import easygraph as eg
- >>> H = eg.random_hypergraph(3, [1, 1])
- >>> cc = eg.hypergraph_local_clustering_coefficient(H)
- >>> cc
- {0: 1.0, 1: 1.0, 2: 1.0}
-
- """
- result={}
- # 节点属于哪些边
- memberships=[]
- forninH.v:
- tmp=set()
- forindex,einenumerate(H.e[0]):
- ifnine:
- tmp.add(index)
- memberships.append(tmp)
-
- # 每条边包含哪些节点
- members=H.e[0]
- forninH.v:
- ev=memberships[n]
- dv=len(ev)
- ifdv<=1:
- result[n]=0
- else:
- total_eo=0
- # go over all pairs of edges pairwise
- fore1inrange(dv):
- edge1=members[e1]
- fore2inrange(e1):
- edge2=members[e2]
- # set differences for the hyperedges
- D1=set(edge1)-set(edge2)
- D2=set(edge2)-set(edge1)
- # if edges are the same by definition the extra overlap is zero
- iflen(D1.union(D2))==0:
- eo=0
- else:
- # otherwise we have to look at their neighbors
- # the neighbors of D1 and D2, respectively.
- neighD1={ifordinD1foriinH.neighbor_of_node(d)}
- neighD2={ifordinD2foriinH.neighbor_of_node(d)}
- # compute extra overlap [len() is used for cardinality of edges]
- eo=(
- len(neighD1.intersection(D2))
- +len(neighD2.intersection(D1))
- )/len(
- D1.union(D2)
- )# add it up
- # add it up
- total_eo=total_eo+eo
-
- # include normalization by degree k*(k-1)/2
- result[n]=2*total_eo/(dv*(dv-1))
- returnresult
-
-
-
[docs]defhypergraph_two_node_clustering_coefficient(H,kind="union"):
-"""Return the clustering coefficients for
- each node in a Hypergraph.
-
- This definition averages over all of the
- two-node clustering coefficients involving the node.
-
- Parameters
- ----------
- H : Hypergraph
- Hypergraph
- kind : string, optional
- The type of two node clustering coefficient. Options
- are "union", "max", and "min". By default, "union".
-
- Returns
- -------
- dict
- nodes are keys, clustering coefficients are values.
-
- Notes
- -----
- The clustering coefficient is undefined when the number of
- neighbors is 0 or 1, but we set the clustering coefficient
- to 0 in these cases. For more discussion, see
- https://arxiv.org/abs/0802.2512
-
- See Also
- --------
- clustering_coefficient
- local_clustering_coefficient
-
- References
- ----------
- "Clustering Coefficients in Protein Interaction Hypernetworks"
- by Suzanne Gallagher and Debra Goldberg.
- DOI: 10.1145/2506583.2506635
-
- Example
- -------
- >>> import easygraph as eg
- >>> H = eg.random_hypergraph(3, [1, 1])
- >>> cc = eg.two_node_clustering_coefficient(H, kind="union")
- >>> cc
- {0: 0.5, 1: 0.5, 2: 0.5}
- """
- result={}
- memberships={}
- forninH.v:
- tmp=set()
- forindex,einenumerate(H.e[0]):
- ifnine:
- tmp.add(index)
- memberships[n]=tmp
-
- forninH.v:
- neighbors=H.neighbor_of_node(n)
- result[n]=0.0
- forvinneighbors:
- result[n]+=_uv_cc(n,v,memberships,kind=kind)/len(neighbors)
- returnresult
-
-
-def_uv_cc(u,v,memberships,kind="union"):
-"""Helper function to compute the two-node
- clustering coefficient.
-
- Parameters
- ----------
- u : hashable
- First node
- v : hashable
- Second node
- memberships : dict
- node IDs are keys, edge IDs to which they belong
- are values.
- kind : str, optional
- Type of clustering coefficient to compute, by default "union".
- Options:
-
- - "union"
- - "max"
- - "min"
-
- Returns
- -------
- float
- The clustering coefficient
-
- Raises
- ------
- EasyGraphError
- If an invalid clustering coefficient kind
- is specified.
-
- References
- ----------
- "Clustering Coefficients in Protein Interaction Hypernetworks"
- by Suzanne Gallagher and Debra Goldberg.
- DOI: 10.1145/2506583.2506635
- """
- m_u=memberships[u]
- m_v=memberships[v]
-
- num=len(m_u.intersection(m_v))
-
- ifkind=="union":
- denom=len(m_u.union(m_v))
- elifkind=="min":
- denom=min(len(m_u),len(m_v))
- elifkind=="max":
- denom=max(len(m_u),len(m_v))
- else:
- raiseEasyGraphError("Invalid kind of clustering.")
-
- ifdenom==0:
- returnnp.nan
-
- returnnum/denom
-
[docs]defhypergraph_density(hg,ignore_singletons=False):
-r"""Hypergraph density.
-
- The density of a hypergraph is the number of existing edges divided by the number of
- possible edges.
-
- Let `H` have :math:`n` nodes and :math:`m` hyperedges. Then,
-
- * `density(H) =` :math:`\frac{m}{2^n - 1}`,
- * `density(H, ignore_singletons=True) =` :math:`\frac{m}{2^n - 1 - n}`.
-
- Here, :math:`2^n` is the total possible number of hyperedges on `H`, from which we
- subtract :math:`1` because the empty hyperedge is not considered. We subtract an
- additional :math:`n` when singletons are not considered.
-
- Now assume `H` has :math:`a` edges with order :math:`1` and :math:`b` edges with
- order :math:`2`. Then,
-
- * `density(H, order=1) =` :math:`\frac{a}{{n \choose 2}}`,
- * `density(H, order=2) =` :math:`\frac{b}{{n \choose 3}}`,
- * `density(H, max_order=1) =` :math:`\frac{a}{{n \choose 1} + {n \choose 2}}`,
- * `density(H, max_order=1, ignore_singletons=True) =` :math:`\frac{a}{{n \choose 2}}`,
- * `density(H, max_order=2) =` :math:`\frac{m}{{n \choose 1} + {n \choose 2} + {n \choose 3}}`,
- * `density(H, max_order=2, ignore_singletons=True) =` :math:`\frac{m}{{n \choose 2} + {n \choose 3}}`,
-
- Parameters
- ---------
- order : int, optional
- If not None, only count edges of the specified order.
- By default, None.
-
- max_order : int, optional
- If not None, only count edges of order up to this value, inclusive.
- By default, None.
-
- ignore_singletons : bool, optional
- Whether to consider singleton edges. Ignored if `order` is not None and
- different from :math:`0`. By default, False.
-
- See Also
- --------
- :func:`incidence_density`
-
- Notes
- -----
- If both `order` and `max_order` are not None, `max_order` is ignored.
-
- """
- n=hg.num_v
- numer=len(hg.e[0])
- ifn<1:
- raiseEasyGraphError("Density not defined for empty hypergraph")
- ifnumer<1:
- return0.0
-
- denom=2**n-1
- ifignore_singletons:
- denom-=n
- try:
- returnnumer/float(denom)
- exceptZeroDivisionError:
- return0.0
[docs]defempty_hypergraph(N=1):
-"""
-
- Parameters
- ----------
- N number of node in Hypergraph, default 1
-
- Returns
- -------
- A eg.Hypergraph with n_num node, without any hyperedge.
-
- """
- returneg.Hypergraph(N)
-
-
-
[docs]defcomplete_hypergraph(n,include_singleton=False):
- ifn==0:
- raiseEasyGraphError("The number of nodes in a Hypergraph can not be zero")
- # init
- # print("easygraph:",eg)
- hypergraph=eg.Hypergraph(n)
- total_hyperedegs=[]
- ifn>1:
- start=1ifinclude_singletonelse2
- forsizeinrange(start,n+1):
- hyperedges=itertools.combinations(list(range(n)),size)
- total_hyperedegs.extend(list(hyperedges))
- hypergraph.add_hyperedges(total_hyperedegs)
- returnhypergraph
Source code for easygraph.functions.hypergraph.null_model.lattice
-"""Generators for some lattice hypergraphs.
-
-All the functions in this module return a Hypergraph class (i.e. a simple, undirected
-hypergraph).
-
-"""
-
-fromwarningsimportwarn
-
-fromeasygraph.utils.exceptionimportEasyGraphError
-
-
-__all__=[
- "ring_lattice",
-]
-
-
-
[docs]defring_lattice(n,d,k,l):
-"""A ring lattice hypergraph.
-
- A d-uniform hypergraph on n nodes where each node is part of k edges and the
- overlap between consecutive edges is d-l.
-
- Parameters
- ----------
- n : int
- Number of nodes
- d : int
- Edge size
- k : int
- Number of edges of which a node is a part. Should be a multiple of 2.
- l : int
- Overlap between edges
-
- Returns
- -------
- Hypergraph
- The generated hypergraph
-
- Raises
- ------
- EasyGraphError
- If k is negative.
-
- Notes
- -----
- ring_lattice(n, 2, k, 0) is a ring lattice graph where each node has k//2 edges on
- either side.
-
- """
- fromeasygraph.classes.hypergraphimportHypergraph
-
- ifk<0:
- raiseEasyGraphError("Invalid k value!")
-
- ifk<2:
- warn("This creates a completely disconnected hypergraph!")
-
- ifk%2!=0:
- warn("k is not divisible by 2")
-
- edges=[
- [node]+[(start+l+i)%nforiinrange(d-1)]
- fornodeinrange(n)
- forstartinrange(node+1,node+k//2+1)
- ]
- H=Hypergraph(num_v=n)
- H.add_hyperedges(edges)
- returnH
[docs]defuniform_hypergraph_Gnp(k:int,num_v:int,prob:float,n_workers=None):
-r"""Return a random ``k``-uniform hypergraph with ``num_v`` vertices and probability ``prob`` of choosing a hyperedge.
-
- Args:
- ``num_v`` (``int``): The Number of vertices.
- ``k`` (``int``): The Number of vertices in each hyperedge.
- ``prob`` (``float``): Probability of choosing a hyperedge.
-
- Examples:
- >>> import easygraph as eg
- >>> hg = eg.random.uniform_hypergraph_Gnp(3, 5, 0.5)
- >>> hg.e
- ([(0, 1, 3), (0, 1, 4), (0, 2, 4), (1, 3, 4), (2, 3, 4)], [1.0, 1.0, 1.0, 1.0, 1.0])
- """
- # similar to BinomialRandomUniform in sagemath, https://doc.sagemath.org/html/en/reference/graphs/sage/graphs/hypergraph_generators.html
-
- assertnum_v>1,"num_v must be greater than 1"
- assertk>1,"k must be greater than 1"
- assert0<=prob<=1,"prob must be between 0 and 1"
- importrandom
-
- ifn_workersisnotNone:
- # use the parallel version for large graph
-
- fromfunctoolsimportpartial
- frommultiprocessingimportPool
-
- edges=combinations(range(num_v),k)
- edges_parallel=split_edges(edges=list(edges),worker=n_workers)
- local_function=partial(uniform_hypergraph_Gnp_parallel,prob=prob)
-
- res_edges=[]
-
- withPool(n_workers)asp:
- ret=p.imap(local_function,edges_parallel)
- forresinret:
- res_edges.extend(res)
- res_hypergraph=eg.Hypergraph(num_v=num_v,e_list=res_edges)
- returnres_hypergraph
-
- else:
- edges=combinations(range(num_v),k)
- edges=[eforeinedgesifrandom.random()<prob]
- returneg.Hypergraph(num_v=num_v,e_list=edges)
-
-
-
[docs]defdcsbm_hypergraph(k1,k2,g1,g2,omega,seed=None):
-"""A function to generate a Degree-Corrected Stochastic Block Model
- (DCSBM) hypergraph.
-
- Parameters
- ----------
- k1 : dict
- This is a dictionary where the keys are node ids
- and the values are node degrees.
- k2 : dict
- This is a dictionary where the keys are edge ids
- and the values are edge sizes.
- g1 : dict
- This a dictionary where the keys are node ids
- and the values are the group ids to which the node belongs.
- The keys must match the keys of k1.
- g2 : dict
- This a dictionary where the keys are edge ids
- and the values are the group ids to which the edge belongs.
- The keys must match the keys of k2.
- omega : 2D numpy array
- This is a matrix with entries which specify the number of edges
- between a given node community and edge community.
- The number of rows must match the number of node communities
- and the number of columns must match the number of edge
- communities.
- seed : int or None (default)
- Seed for the random number generator.
-
- Returns
- -------
- Hypergraph
-
- Warns
- -----
- warnings.warn
- If the sums of the edge sizes and node degrees are not equal, the
- algorithm still runs, but raises a warning.
- Also if the sum of the omega matrix does not match the sum of degrees,
- a warning is raised.
-
- Notes
- -----
- The sums of k1 and k2 should be the same. If they are not the same, this function
- returns a warning but still runs. The sum of k1 (and k2) and omega should be the
- same. If they are not the same, this function returns a warning but still runs and
- the number of entries in the incidence matrix is determined by the omega matrix.
-
- References
- ----------
- Implemented by Mirah Shi in HyperNetX and described for bipartite networks by
- Larremore et al. in https://doi.org/10.1103/PhysRevE.90.012805
-
- Examples
- --------
- >>> import easygraph as eg; import random; import numpy as np
- >>> n = 50
- >>> k1 = {i : random.randint(1, n) for i in range(n)}
- >>> k2 = {i : sorted(k1.values())[i] for i in range(n)}
- >>> g1 = {i : random.choice([0, 1]) for i in range(n)}
- >>> g2 = {i : random.choice([0, 1]) for i in range(n)}
- >>> omega = np.array([[n//2, 10], [10, n//2]])
- >>> H = eg.dcsbm_hypergraph(k1, k2, g1, g2, omega)
-
- """
- ifseedisnotNone:
- random.seed(seed)
-
- # sort dictionary by degree in decreasing order
- node_labels=[nforn,_insorted(k1.items(),key=lambdad:d[1],reverse=True)]
- edge_labels=[mform,_insorted(k2.items(),key=lambdad:d[1],reverse=True)]
-
- # Verify that the sum of node and edge degrees and the sum of node degrees and the
- # sum of community connection matrix differ by less than a single edge.
- ifabs(sum(k1.values())-sum(k2.values()))>1:
- warnings.warn(
- "The sum of the degree sequence does not match the sum of the size sequence"
- )
-
- ifabs(sum(k1.values())-np.sum(omega))>1:
- warnings.warn(
- "The sum of the degree sequence does not "
- "match the entries in the omega matrix"
- )
-
- # get indices for each community
- community1_nodes=defaultdict(list)
- forlabelinnode_labels:
- group=g1[label]
- community1_nodes[group].append(label)
-
- community2_nodes=defaultdict(list)
- forlabelinedge_labels:
- group=g2[label]
- community2_nodes[group].append(label)
-
- H=eg.Hypergraph(num_v=len(node_labels))
-
- kappa1=defaultdict(lambda:0)
- kappa2=defaultdict(lambda:0)
- forid,ging1.items():
- kappa1[g]+=k1[id]
- forid,ging2.items():
- kappa2[g]+=k2[id]
-
- tmp_hyperedges=[]
- forgroup1incommunity1_nodes.keys():
- forgroup2incommunity2_nodes.keys():
- # for each constant probability patch
- try:
- group_constant=omega[group1,group2]/(
- kappa1[group1]*kappa2[group2]
- )
- exceptZeroDivisionError:
- group_constant=0
-
- foruincommunity1_nodes[group1]:
- j=0
- v=community2_nodes[group2][j]# start from beginning every time
- # max probability
- p=min(k1[u]*k2[v]*group_constant,1)
- whilej<len(community2_nodes[group2]):
- ifp!=1:
- r=random.random()
- try:
- j=j+math.floor(math.log(r)/math.log(1-p))
- exceptZeroDivisionError:
- j=np.inf
- ifj<len(community2_nodes[group2]):
- v=community2_nodes[group2][j]
- q=min((k1[u]*k2[v])*group_constant,1)
- r=random.random()
- ifr<q/p:
- # no duplicates
- ifv<len(tmp_hyperedges):
- ifunotintmp_hyperedges[v]:
- tmp_hyperedges[v].append(u)
- else:
- tmp_hyperedges.append([u])
-
- p=q
- j=j+1
-
- H.add_hyperedges(tmp_hyperedges)
- returnH
-
-
-
[docs]defwatts_strogatz_hypergraph(n,d,k,l,p,seed=None):
-"""
-
- Parameters
- ----------
- n : int
- The number of nodes
- d : int
- Edge size
- k: int
- Number of edges of which a node is a part. Should be a multiple of 2.
- l: int
- Overlap between edges
- p : float
- The probability of rewiring each edge
- seed
-
- Returns
- -------
-
- """
- ifseedisnotNone:
- np.random.seed(seed)
- H=ring_lattice(n,d,k,l)
- to_remove=[]
- to_add=[]
- H_edges=H.e[0]
- foreinH_edges:
- ifnp.random.random()<p:
- to_remove.append(e)
- node=min(e)
- neighbors=np.random.choice(H.v,size=d-1)
- to_add.append(np.append(neighbors,node))
-
- foreinto_remove:
- ifeinH_edges:
- H_edges.remove(e)
-
- foreinto_add:
- H_edges.append(e)
-
- H=eg.Hypergraph(num_v=n,e_list=H_edges)
- # H.remove_hyperedges(to_remove)
- # print("watts_strogatz:",H.e)
- # H.add_hyperedges(to_add)
-
- returnH
-
-
-
[docs]defchung_lu_hypergraph(k1,k2,seed=None):
-"""A function to generate a Chung-Lu hypergraph
-
- Parameters
- ----------
- k1 : dict
- Dict where the keys are node ids
- and the values are node degrees.
- k2 : dict
- dict where the keys are edge ids
- and the values are edge sizes.
- seed : integer or None (default)
- The seed for the random number generator.
-
- Returns
- -------
- Hypergraph object
- The generated hypergraph
-
- Warns
- -----
- warnings.warn
- If the sums of the edge sizes and node degrees are not equal, the
- algorithm still runs, but raises a warning.
-
- Notes
- -----
- The sums of k1 and k2 should be the same. If they are not the same,
- this function returns a warning but still runs.
-
- References
- ----------
- Implemented by Mirah Shi in HyperNetX and described for
- bipartite networks by Aksoy et al. in https://doi.org/10.1093/comnet/cnx001
-
- Example
- -------
- >>> import easygraph as eg
- >>> import random
- >>> n = 100
- >>> k1 = {i : random.randint(1, 100) for i in range(n)}
- >>> k2 = {i : sorted(k1.values())[i] for i in range(n)}
- >>> H = eg.chung_lu_hypergraph(k1, k2)
-
- """
- ifseedisnotNone:
- random.seed(seed)
-
- # sort dictionary by degree in decreasing order
- node_labels=[nforn,_insorted(k1.items(),key=lambdad:d[1],reverse=True)]
- edge_labels=[mform,_insorted(k2.items(),key=lambdad:d[1],reverse=True)]
-
- m=len(k2)
-
- ifsum(k1.values())!=sum(k2.values()):
- warnings.warn(
- "The sum of the degree sequence does not match the sum of the size sequence"
- )
-
- S=sum(k1.values())
-
- H=eg.Hypergraph(len(node_labels))
-
- tmp_hyperedges=[]
- foruinnode_labels:
- j=0
- v=edge_labels[j]# start from beginning every time
- p=min((k1[u]*k2[v])/S,1)
-
- whilej<m:
- ifp!=1:
- r=random.random()
- try:
- j=j+math.floor(math.log(r)/math.log(1-p))
- exceptZeroDivisionError:
- j=np.inf
-
- ifj<m:
- v=edge_labels[j]
- q=min((k1[u]*k2[v])/S,1)
- r=random.random()
- ifr<q/p:
- # no duplicates
- ifv<len(tmp_hyperedges):
- tmp_hyperedges[v].append(u)
- else:
- tmp_hyperedges.append([u])
- p=q
- j=j+1
-
- H.add_hyperedges(tmp_hyperedges)
- returnH
-
-
-
[docs]defrandom_hypergraph(N,ps,order=None,seed=None):
-"""Generates a random hypergraph
-
- Generate N nodes, and connect any d+1 nodes
- by a hyperedge with probability ps[d-1].
-
- Parameters
- ----------
- N : int
- Number of nodes
- ps : list of float
- List of probabilities (between 0 and 1) to create a
- hyperedge at each order d between any d+1 nodes. For example,
- ps[0] is the wiring probability of any edge (2 nodes), ps[1]
- of any triangles (3 nodes).
- order: int of None (default)
- If None, ignore. If int, generates a uniform hypergraph with edges
- of order `order` (ps must have only one element).
- seed : integer or None (default)
- Seed for the random number generator.
-
- Returns
- -------
- Hypergraph object
- The generated hypergraph
-
- References
- ----------
- Described as 'random hypergraph' by M. Dewar et al. in https://arxiv.org/abs/1703.07686
-
- Example
- -------
- >>> import easygraph as eg
- >>> H = eg.random_hypergraph(50, [0.1, 0.01])
-
- """
- ifseedisnotNone:
- np.random.seed(seed)
-
- iforderisnotNone:
- iflen(ps)!=1:
- raiseEasyGraphError("ps must contain a single element if order is an int")
-
- if(np.any(np.array(ps)<0))or(np.any(np.array(ps)>1)):
- raiseEasyGraphError("All elements of ps must be between 0 and 1 included.")
-
- nodes=range(N)
- hyperedges=[]
-
- fori,pinenumerate(ps):
- iforderisnotNone:
- d=order
- else:
- d=i+1# order, ps[0] is prob of edges (d=1)
-
- potential_edges=combinations(nodes,d+1)
- n_comb=comb(N,d+1,exact=True)
- mask=np.random.random(size=n_comb)<=p# True if edge to keep
-
- edges_to_add=[efore,valinzip(potential_edges,mask)ifval]
-
- hyperedges+=edges_to_add
-
- H=eg.Hypergraph(num_v=N)
- H.add_hyperedges(hyperedges)
-
- returnH
[docs]defstar_clique(n_star,n_clique,d_max):
-"""Generate a star-clique structure
-
- That is a star network and a clique network,
- connected by one pairwise edge connecting the centre of the star to the clique.
- network, the each clique is promoted to a hyperedge
- up to order d_max.
-
- Parameters
- ----------
- n_star : int
- Number of legs of the star
- n_clique : int
- Number of nodes in the clique
- d_max : int
- Maximum order up to which to promote
- cliques to hyperedges
-
- Returns
- -------
- H : Hypergraph
-
- Examples
- --------
- >>> import easygraph as eg
- >>> H = eg.star_clique(6, 7, 2)
-
- Notes
- -----
- The total number of nodes is n_star + n_clique.
-
- """
-
- ifn_star<=0:
- raiseValueError("n_star must be an integer > 0.")
- ifn_clique<=0:
- raiseValueError("n_clique must be an integer > 0.")
- ifd_max<0:
- raiseValueError("d_max must be an integer >= 0.")
- elifd_max>n_clique-1:
- raiseValueError("d_max must be <= n_clique - 1.")
-
- nodes_star=range(n_star)
- nodes_clique=range(n_star,n_star+n_clique)
- nodes=list(nodes_star)+list(nodes_clique)
-
- H=eg.Hypergraph(num_v=len(nodes))
-
- # add star edges (center of the star is 0-th node)
- H.add_hyperedges([[nodes_star[0],nodes_star[i]]foriinrange(1,n_star)])
-
- # connect clique and star by adding last star leg
- H.add_hyperedges([nodes_star[0],nodes_clique[0]])
-
- # add clique hyperedges up to order d_max
-
- H.add_hyperedges(
- [
- e
- fordinrange(1,d_max+1)
- foreinlist(combinations(nodes_clique,d+1))
- ]
- )
-
- returnH
[docs]defuniform_hypergraph_Gnm(k:int,num_v:int,num_e:int,n_workers=None):
-r"""Return a random ``k``-uniform hypergraph with ``num_v`` vertices and ``num_e`` hyperedges.
-
- Args:
- ``k`` (``int``): The Number of vertices in each hyperedge.
- ``num_v`` (``int``): The Number of vertices.
- ``num_e`` (``int``): The Number of hyperedges.
-
- Examples:
- >>> import easygraph as eg
- >>> hg = eg.uniform_hypergraph_Gnm(3, 5, 4)
- >>> hg.e
- ([(0, 1, 2), (0, 1, 3), (0, 3, 4), (2, 3, 4)], [1.0, 1.0, 1.0, 1.0])
- """
- # similar to UniformRandomUniform in sagemath, https://doc.sagemath.org/html/en/reference/graphs/sage/graphs/hypergraph_generators.html
-
- assertk>1,"k must be greater than 1"# TODO ?
- assertnum_v>1,"num_v must be greater than 1"
- assertnum_e>0,"num_e must be greater than 0"
-
- ifn_workersisnotNone:
- # use the parallel version for large graph
- edges=set()
- fromfunctoolsimportpartial
- frommultiprocessingimportPool
-
- # res_edges = set()
- edges_parallel=split_num_e(num_e=num_e,worker=n_workers)
- local_function=partial(uniform_hypergraph_Gnm_parallel,num_v=num_v,k=k)
-
- res_edges=set()
- importtime
-
- withPool(n_workers)asp:
- ret=p.imap(local_function,edges_parallel)
- forresinret:
- forrinres:
- res_edges.add(r)
-
- whilelen(res_edges)<num_e:
- e=random.sample(range(num_v),k)
- e=tuple(sorted(e))
- ifenotinres_edges:
- res_edges.add(e)
-
- res_hypergraph=eg.Hypergraph(num_v=num_v,e_list=list(res_edges))
- returnres_hypergraph
-
- else:
- edges=set()
- whilelen(edges)<num_e:
- e=random.sample(range(num_v),k)
- e=tuple(sorted(e))
- ifenotinedges:
- edges.add(e)
-
- returneg.Hypergraph(num_v,list(edges))
-
-
-
[docs]defuniform_hypergraph_configuration_model(k,m,seed=None):
-"""
- A function to generate an m-uniform configuration model
-
- Parameters
- ----------
- k : dictionary
- This is a dictionary where the keys are node ids
- and the values are node degrees.
- m : int
- specifies the hyperedge size
- seed : integer or None (default)
- The seed for the random number generator
-
- Returns
- -------
- Hypergraph object
- The generated hypergraph
-
- Warns
- -----
- warnings.warn
- If the sums of the degrees are not divisible by m, the
- algorithm still runs, but raises a warning and adds an
- additional connection to random nodes to satisfy this
- condition.
-
- Notes
- -----
- This algorithm normally creates multi-edges and loopy hyperedges.
- We remove the loopy hyperedges.
-
- References
- ----------
- "The effect of heterogeneity on hypergraph contagion models"
- by Nicholas W. Landry and Juan G. Restrepo
- https://doi.org/10.1063/5.0020034
-
-
- Example
- -------
- >>> import easygraph as eg
- >>> import random
- >>> n = 1000
- >>> m = 3
- >>> k = {1: 1, 2: 2, 3: 3, 4: 3}
- >>> H = eg.uniform_hypergraph_configuration_model(k, m)
-
- """
- ifseedisnotNone:
- random.seed(seed)
-
- # Making sure we have the right number of stubs
- remainder=sum(k.values())%m
- ifremainder!=0:
- warnings.warn(
- "This degree sequence is not realizable. "
- "Increasing the degree of random nodes so that it is."
- )
- random_ids=random.sample(list(k.keys()),int(round(m-remainder)))
- foridinrandom_ids:
- k[id]=k[id]+1
-
- stubs=[]
- # Creating the list to index through
- foridink:
- stubs.extend([id]*int(k[id]))
-
- H=eg.Hypergraph(num_v=len(k))
-
- whilelen(stubs)!=0:
- u=random.sample(range(len(stubs)),m)
- edge=set()
- forindexinu:
- edge.add(stubs[index])
- iflen(edge)==m:
- H.add_hyperedges(list(edge))
-
- forindexinsorted(u,reverse=True):
- delstubs[index]
-
- returnH
-
-
-
[docs]defuniform_HSBM(n,m,p,sizes,seed=None):
-"""Create a uniform hypergraph stochastic block model (HSBM).
-
- Parameters
- ----------
- n : int
- The number of nodes
- m : int
- The hyperedge size
- p : m-dimensional numpy array
- tensor of probabilities between communities
- sizes : list or 1D numpy array
- The sizes of the community blocks in order
- seed : integer or None (default)
- The seed for the random number generator
-
- Returns
- -------
- Hypergraph
- The constructed SBM hypergraph
-
- Raises
- ------
- EasyGraphError
- - If the length of sizes and p do not match.
- - If p is not a tensor with every dimension equal
- - If p is not m-dimensional
- - If the entries of p are not in the range [0, 1]
- - If the sum of the vector of sizes does not equal the number of nodes.
- Exception
- If there is an integer overflow error
-
- See Also
- --------
- uniform_HPPM
-
- References
- ----------
- Nicholas W. Landry and Juan G. Restrepo.
- "Polarization in hypergraphs with community structure."
- Preprint, 2023. https://doi.org/10.48550/arXiv.2302.13967
- """
- # Check if dimensions match
- iflen(sizes)!=np.size(p,axis=0):
- raiseEasyGraphError("'sizes' and 'p' do not match.")
- iflen(np.shape(p))!=m:
- raiseEasyGraphError("The dimension of p does not match m")
- # Check that p has the same length over every dimension.
- iflen(set(np.shape(p)))!=1:
- raiseEasyGraphError("'p' must be a square tensor.")
- ifnp.max(p)>1ornp.min(p)<0:
- raiseEasyGraphError("Entries of 'p' not in [0,1].")
- ifnp.sum(sizes)!=n:
- raiseEasyGraphError("Sum of sizes does not match n")
-
- ifseedisnotNone:
- np.random.seed(seed)
-
- node_labels=range(n)
- H=eg.Hypergraph(num_v=n)
-
- block_range=range(len(sizes))
- # Split node labels in a partition (list of sets).
- size_cumsum=[sum(sizes[0:x])forxinrange(0,len(sizes)+1)]
- partition=[
- list(node_labels[size_cumsum[x]:size_cumsum[x+1]])
- forxinrange(0,len(size_cumsum)-1)
- ]
-
- forblockinitertools.product(block_range,repeat=m):
- ifp[block]==1:# Test edges cases p_ij = 0 or 1
- edges=itertools.product((partition[i]foriinblock_range))
- foreinedges:
- H.add_hyperedges(list(e))
- elifp[block]>0:
- partition_sizes=[len(partition[i])foriinblock]
- max_index=reduce(operator.mul,partition_sizes,1)
- ifmax_index<0:
- raiseException("Index overflow error!")
- index=np.random.geometric(p[block])-1
-
- whileindex<max_index:
- indices=_index_to_edge_partition(index,partition_sizes,m)
- e={partition[block[i]][indices[i]]foriinrange(m)}
- iflen(e)==m:
- H.add_hyperedges(list(e))
- index+=np.random.geometric(p[block])
- returnH
-
-
-
[docs]defuniform_HPPM(n,m,rho,k,epsilon,seed=None):
-"""Construct the m-uniform hypergraph planted partition model (m-HPPM)
-
- Parameters
- ----------
- n : int > 0
- Number of nodes
- m : int > 0
- Hyperedge size
- rho : float between 0 and 1
- The fraction of nodes in community 1
- k : float > 0
- Mean degree
- epsilon : float > 0
- Imbalance parameter
- seed : integer or None (default)
- The seed for the random number generator
-
- Returns
- -------
- Hypergraph
- The constructed m-HPPM hypergraph.
-
- Raises
- ------
- EasyGraphError
- - If rho is not between 0 and 1
- - If the mean degree is negative.
- - If epsilon is not between 0 and 1
-
- See Also
- --------
- uniform_HSBM
-
- References
- ----------
- Nicholas W. Landry and Juan G. Restrepo.
- "Polarization in hypergraphs with community structure."
- Preprint, 2023. https://doi.org/10.48550/arXiv.2302.13967
- """
-
- ifrho<0orrho>1:
- raiseEasyGraphError("The value of rho must be between 0 and 1")
- ifk<0:
- raiseEasyGraphError("The mean degree must be non-negative")
- ifepsilon<0orepsilon>1:
- raiseEasyGraphError("epsilon must be between 0 and 1")
-
- sizes=[int(rho*n),n-int(rho*n)]
-
- p=k/(m*n**(m-1))
- # ratio of inter- to intra-community edges
- q=rho**m+(1-rho)**m
- r=1/q-1
- p_in=(1+r*epsilon)*p
- p_out=(1-epsilon)*p
-
- p=p_out*np.ones([2]*m)
- p[tuple([0]*m)]=p_in
- p[tuple([1]*m)]=p_in
-
- returnuniform_HSBM(n,m,p,sizes,seed=seed)
-
-
-
[docs]defuniform_erdos_renyi_hypergraph(n,m,p,p_type="degree",seed=None):
-"""Generate an m-uniform Erdős–Rényi hypergraph
-
- This creates a hypergraph with `n` nodes where
- hyperedges of size `m` are created at random to
- obtain a mean degree of `k`.
-
- Parameters
- ----------
- n : int > 0
- Number of nodes
- m : int > 0
- Hyperedge size
- p : float or int > 0
- Mean expected degree if p_type="degree" and
- probability of an m-hyperedge if p_type="prob"
- p_type : str
- "degree" or "prob", by default "degree"
- seed : integer or None (default)
- The seed for the random number generator
-
- Returns
- -------
- Hypergraph
- The Erdos Renyi hypergraph
-
-
- See Also
- --------
- random_hypergraph
- """
- ifseedisnotNone:
- np.random.seed(seed)
-
- H=eg.Hypergraph(num_v=n)
-
- ifp_type=="degree":
- q=p/(m*n**(m-1))# wiring probability
- elifp_type=="prob":
- q=p
- else:
- raiseEasyGraphError("Invalid p_type!")
-
- ifq>1orq<0:
- raiseEasyGraphError("Probability not in [0,1].")
-
- index=np.random.geometric(q)-1# -1 b/c zero indexing
- max_index=n**m
- whileindex<max_index:
- e=set(_index_to_edge(index,n,m))
- iflen(e)==m:
- H.add_hyperedges(list(e))
- index+=np.random.geometric(q)
- returnH
-
-
-def_index_to_edge(index,n,m):
-"""Generate a hyperedge given an index in the list of possible edges.
-
- Parameters
- ----------
- index : int > 0
- The index of the hyperedge in the list of all possible hyperedges.
- n : int > 0
- The number of nodes
- m : int > 0
- The hyperedge size.
-
- Returns
- -------
- list
- The reconstructed hyperedge
-
- See Also
- --------
- _index_to_edge_partition
-
- References
- ----------
- https://stackoverflow.com/questions/53834707/element-at-index-in-itertools-product
- """
- return[(index//(n**r)%n)forrinrange(m-1,-1,-1)]
-
-
-def_index_to_edge_partition(index,partition_sizes,m):
-"""Generate a hyperedge given an index in the list of possible edges
- and a partition of community labels.
-
- Parameters
- ----------
- index : int > 0
- The index of the hyperedge in the list of all possible hyperedges.
- n : int > 0
- The number of nodes
- m : int > 0
- The hyperedge size.
-
- Returns
- -------
- list
- The reconstructed hyperedge
-
- See Also
- --------
- _index_to_edge
-
- """
- try:
- return[
- int(index//np.prod(partition_sizes[r+1:])%partition_sizes[r])
- forrinrange(m)
- ]
- exceptKeyError:
- raiseException("Invalid parameters")
-
[docs]defis_isolate(G,n):
-"""Determines whether a node is an isolate.
-
- An *isolate* is a node with no neighbors (that is, with degree
- zero). For directed graphs, this means no in-neighbors and no
- out-neighbors.
-
- Parameters
- ----------
- G : EasyGraph graph
-
- n : node
- A node in `G`.
-
- Returns
- -------
- is_isolate : bool
- True if and only if `n` has no neighbors.
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_edge(1, 2)
- >>> G.add_node(3)
- >>> eg.is_isolate(G, 2)
- False
- >>> eg.is_isolate(G, 3)
- True
- """
- returnG.degree()[n]==0
-
-
-
[docs]defisolates(G):
-"""Iterator over isolates in the graph.
-
- An *isolate* is a node with no neighbors (that is, with degree
- zero). For directed graphs, this means no in-neighbors and no
- out-neighbors.
-
- Parameters
- ----------
- G : EasyGraph graph
-
- Returns
- -------
- iterator
- An iterator over the isolates of `G`.
-
- Examples
- --------
- To get a list of all isolates of a graph, use the :class:`list`
- constructor::
-
- >>> G = eg.Graph()
- >>> G.add_edge(1, 2)
- >>> G.add_node(3)
- >>> list(eg.isolates(G))
- [3]
-
- To remove all isolates in the graph, first create a list of the
- isolates, then use :meth:`Graph.remove_nodes_from`::
-
- >>> G.remove_nodes_from(list(eg.isolates(G)))
- >>> list(G)
- [1, 2]
-
- For digraphs, isolates have zero in-degree and zero out_degre::
-
- >>> G = eg.DiGraph([(0, 1), (1, 2)])
- >>> G.add_node(3)
- >>> list(eg.isolates(G))
- [3]
-
- """
- return(nforn,dinG.degree().items()ifd==0)
-
-
-
[docs]defnumber_of_isolates(G):
-"""Returns the number of isolates in the graph.
-
- An *isolate* is a node with no neighbors (that is, with degree
- zero). For directed graphs, this means no in-neighbors and no
- out-neighbors.
-
- Parameters
- ----------
- G : EasyGraph graph
-
- Returns
- -------
- int
- The number of degree zero nodes in the graph `G`.
-
- """
- # TODO This can be parallelized.
- returnsum(1forvinisolates(G))
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-defbridges(G,root=None):
-"""Generate all bridges in a graph.
-
- A *bridge* in a graph is an edge whose removal causes the number of
- connected components of the graph to increase. Equivalently, a bridge is an
- edge that does not belong to any cycle.
-
- Parameters
- ----------
- G : undirected graph
-
- root : node (optional)
- A node in the graph `G`. If specified, only the bridges in the
- connected component containing this node will be returned.
-
- Yields
- ------
- e : edge
- An edge in the graph whose removal disconnects the graph (or
- causes the number of connected components to increase).
-
- Raises
- ------
- NodeNotFound
- If `root` is not in the graph `G`.
-
- Examples
- --------
-
- >>> list(eg.bridges(G))
- [(9, 10)]
-
- Notes
- -----
- This is an implementation of the algorithm described in _[1]. An edge is a
- bridge if and only if it is not contained in any chain. Chains are found
- using the :func:`chain_decomposition` function.
-
- Ignoring polylogarithmic factors, the worst-case time complexity is the
- same as the :func:`chain_decomposition` function,
- $O(m + n)$, where $n$ is the number of nodes in the graph and $m$ is
- the number of edges.
-
- References
- ----------
- .. [1] https://en.wikipedia.org/wiki/Bridge_%28graph_theory%29#Bridge-Finding_with_Chain_Decompositions
- """
- chains=chain_decomposition(G,root=root)
- chain_edges=set(chain.from_iterable(chains))
- foru,v,tinG.edges:
- if(u,v)notinchain_edgesand(v,u)notinchain_edges:
- yieldu,v
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-defhas_bridges(G,root=None):
-"""Decide whether a graph has any bridges.
-
- A *bridge* in a graph is an edge whose removal causes the number of
- connected components of the graph to increase.
-
- Parameters
- ----------
- G : undirected graph
-
- root : node (optional)
- A node in the graph `G`. If specified, only the bridges in the
- connected component containing this node will be considered.
-
- Returns
- -------
- bool
- Whether the graph (or the connected component containing `root`)
- has any bridges.
-
- Raises
- ------
- NodeNotFound
- If `root` is not in the graph `G`.
-
- Examples
- --------
-
- >>> eg.has_bridges(G)
- True
-
- Notes
- -----
- This implementation uses the :func:`easygraph.bridges` function, so
- it shares its worst-case time complexity, $O(m + n)$, ignoring
- polylogarithmic factors, where $n$ is the number of nodes in the
- graph and $m$ is the number of edges.
-
- """
- try:
- next(bridges(G))
- exceptStopIteration:
- returnFalse
- else:
- returnTrue
-
-
-defchain_decomposition(G,root=None):
- def_dfs_cycle_forest(G,root=None):
- H=eg.DiGraph()
- nodes=[]
- foru,v,dindfs_labeled_edges(G,source=root):
- ifd=="forward":
- # `dfs_labeled_edges()` yields (root, root, 'forward')
- # if it is beginning the search on a new connected
- # component.
- ifu==v:
- H.add_node(v,parent=None)
- nodes.append(v)
- else:
- H.add_node(v,parent=u)
- H.add_edge(v,u,nontree=False)
- nodes.append(v)
- # `dfs_labeled_edges` considers nontree edges in both
- # orientations, so we need to not add the edge if it its
- # other orientation has been added.
- elifd=="nontree"andvnotinH[u]:
- H.add_edge(v,u,nontree=True)
- else:
- # Do nothing on 'reverse' edges; we only care about
- # forward and nontree edges.
- pass
- returnH,nodes
-
- def_build_chain(G,u,v,visited):
- whilevnotinvisited:
- yieldu,v
- visited.add(v)
- u,v=v,G.nodes[v]["parent"]
- yieldu,v
-
- H,nodes=_dfs_cycle_forest(G,root)
-
- visited=set()
- foruinnodes:
- visited.add(u)
- # For each nontree edge going out of node u...
- edges=[]
- forw,v,dinH.edges:
- ifw==uandd["nontree"]==True:
- edges.append((w,v))
- # edges = ((u, v) for u, v, d in H.out_edges(u, data="nontree") if d)
- foru,vinedges:
- # Create the cycle or cycle prefix starting with the
- # nontree edge.
- chain=list(_build_chain(H,u,v,visited))
- yieldchain
-
-
-defdfs_labeled_edges(G,source=None,depth_limit=None):
- ifsourceisNone:
- # edges for all components
- nodes=G
- else:
- # edges for components with source
- nodes=[source]
- visited=set()
- ifdepth_limitisNone:
- depth_limit=len(G)
- forstartinnodes:
- ifstartinvisited:
- continue
- yieldstart,start,"forward"
- visited.add(start)
- stack=[(start,depth_limit,iter(G[start]))]
- whilestack:
- parent,depth_now,children=stack[-1]
- try:
- child=next(children)
- ifchildinvisited:
- yieldparent,child,"nontree"
- else:
- yieldparent,child,"forward"
- visited.add(child)
- ifdepth_now>1:
- stack.append((child,depth_now-1,iter(G[child])))
- exceptStopIteration:
- stack.pop()
- ifstack:
- yieldstack[-1][0],parent,"reverse"
- yieldstart,start,"reverse"
-
[docs]defeccentricity(G,v=None,sp=None):
-"""Returns the eccentricity of nodes in G.
-
- The eccentricity of a node v is the maximum distance from v to
- all other nodes in G.
-
- Parameters
- ----------
- G : EasyGraph graph
- A graph
-
- v : node, optional
- Return value of specified node
-
- sp : dict of dicts, optional
- All pairs shortest path lengths as a dictionary of dictionaries
-
- Returns
- -------
- ecc : dictionary
- A dictionary of eccentricity values keyed by node.
-
- Examples
- --------
- >>> G = eg.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)])
- >>> dict(eg.eccentricity(G))
- {1: 2, 2: 3, 3: 2, 4: 2, 5: 3}
-
- >>> dict(eg.eccentricity(G, v=[1, 5])) # This returns the eccentrity of node 1 & 5
- {1: 2, 5: 3}
-
- """
- # if v is None: # none, use entire graph
- # nodes=G.nodes()
- # elif v in G: # is v a single node
- # nodes=[v]
- # else: # assume v is a container of nodes
- # nodes=v
- order=G.order()
-
- e={}
- forninG.nbunch_iter(v):
- ifspisNone:
- length=eg.single_source_dijkstra(G,n)
- L=len(length)
- else:
- try:
- length=sp[n]
- L=len(length)
- exceptTypeErroraserr:
- raiseeg.EasyGraphError('Format of "sp" is invalid.')fromerr
- ifL!=order:
- ifG.is_directed():
- msg=(
- "Found infinite path length because the digraph is not"
- " strongly connected"
- )
- else:
- msg="Found infinite path length because the graph is not connected"
- raiseeg.EasyGraphError(msg)
-
- e[n]=max(length.values())
-
- ifvinG:
- returne[v]# return single value
- else:
- returne
-
-
-
[docs]defdiameter(G,e=None):
-"""Returns the diameter of the graph G.
-
- The diameter is the maximum eccentricity.
-
- Parameters
- ----------
- G : EasyGraph graph
- A graph
-
- e : eccentricity dictionary, optional
- A precomputed dictionary of eccentricities.
-
- Returns
- -------
- d : integer
- Diameter of graph
-
- Examples
- --------
- >>> G = eg.Graph([(1, 2), (1, 3), (1, 4), (3, 4), (3, 5), (4, 5)])
- >>> eg.diameter(G)
- 3
-
- See Also
- --------
- eccentricity
- """
-
- ifeisNone:
- e=eccentricity(G)
- returnmax(e.values())
-fromheapqimportheappop
-fromheapqimportheappush
-fromitertoolsimportcount
-frommathimportisnan
-fromoperatorimportitemgetter
-
-fromeasygraph.utils.decoratorsimport*
-
-
-__all__=[
- "minimum_spanning_edges",
- "maximum_spanning_edges",
- "minimum_spanning_tree",
- "maximum_spanning_tree",
-]
-
-
-defboruvka_mst_edges(G,minimum=True,weight="weight",data=True,ignore_nan=False):
-"""Iterate over edges of a Borůvka's algorithm min/max spanning tree.
-
- Parameters
- ----------
- G : EasyGraph Graph
- The edges of `G` must have distinct weights,
- otherwise the edges may not form a tree.
-
- minimum : bool (default: True)
- Find the minimum (True) or maximum (False) spanning tree.
-
- weight : string (default: 'weight')
- The name of the edge attribute holding the edge weights.
-
- data : bool (default: True)
- Flag for whether to yield edge attribute dicts.
- If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
- If False, yield edges `(u, v)`.
-
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
-
- """
- # Initialize a forest, assuming initially that it is the discrete
- # partition of the nodes of the graph.
- forest=UnionFind(G)
-
- defbest_edge(component):
-"""Returns the optimum (minimum or maximum) edge on the edge
- boundary of the given set of nodes.
-
- A return value of ``None`` indicates an empty boundary.
-
- """
- sign=1ifminimumelse-1
- minwt=float("inf")
- boundary=None
- foreinedge_boundary(G,component,data=True):
- wt=e[-1].get(weight,1)*sign
- ifisnan(wt):
- ifignore_nan:
- continue
- msg=f"NaN found as an edge weight. Edge {e}"
- raiseValueError(msg)
- ifwt<minwt:
- minwt=wt
- boundary=e
- returnboundary
-
- # Determine the optimum edge in the edge boundary of each component
- # in the forest.
- best_edges=(best_edge(component)forcomponentinforest.to_sets())
- best_edges=[edgeforedgeinbest_edgesifedgeisnotNone]
- # If each entry was ``None``, that means the graph was disconnected,
- # so we are done generating the forest.
- whilebest_edges:
- # Determine the optimum edge in the edge boundary of each
- # component in the forest.
- #
- # This must be a sequence, not an iterator. In this list, the
- # same edge may appear twice, in different orientations (but
- # that's okay, since a union operation will be called on the
- # endpoints the first time it is seen, but not the second time).
- #
- # Any ``None`` indicates that the edge boundary for that
- # component was empty, so that part of the forest has been
- # completed.
- #
- # TODO This can be parallelized, both in the outer loop over
- # each component in the forest and in the computation of the
- # minimum. (Same goes for the identical lines outside the loop.)
- best_edges=(best_edge(component)forcomponentinforest.to_sets())
- best_edges=[edgeforedgeinbest_edgesifedgeisnotNone]
- # Join trees in the forest using the best edges, and yield that
- # edge, since it is part of the spanning tree.
- #
- # TODO This loop can be parallelized, to an extent (the union
- # operation must be atomic).
- foru,v,dinbest_edges:
- ifforest[u]!=forest[v]:
- ifdata:
- yieldu,v,d
- else:
- yieldu,v
- forest.union(u,v)
-
-
-@hybrid("cpp_kruskal_mst_edges")
-defkruskal_mst_edges(G,minimum=True,weight="weight",data=True,ignore_nan=False):
-"""Iterate over edges of a Kruskal's algorithm min/max spanning tree.
-
- Parameters
- ----------
- G : EasyGraph Graph
- The graph holding the tree of interest.
-
- minimum : bool (default: True)
- Find the minimum (True) or maximum (False) spanning tree.
-
- weight : string (default: 'weight')
- The name of the edge attribute holding the edge weights.
-
- data : bool (default: True)
- Flag for whether to yield edge attribute dicts.
- If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
- If False, yield edges `(u, v)`.
-
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
-
- """
- subtrees=UnionFind()
- edges=[]
- foru,v,tinG.edges:
- edges.append((u,v,t))
-
- deffilter_nan_edges(edges=edges,weight=weight):
- sign=1ifminimumelse-1
- foru,v,dinedges:
- wt=d.get(weight,1)*sign
- ifisnan(wt):
- ifignore_nan:
- continue
- msg=f"NaN found as an edge weight. Edge {(u,v,d)}"
- raiseValueError(msg)
- yieldwt,u,v,d
-
- edges=sorted(filter_nan_edges(),key=itemgetter(0))
- forwt,u,v,dinedges:
- ifsubtrees[u]!=subtrees[v]:
- ifdata:
- yield(u,v,d)
- else:
- yield(u,v)
- subtrees.union(u,v)
-
-
-@hybrid("cpp_prim_mst_edges")
-defprim_mst_edges(G,minimum=True,weight="weight",data=True,ignore_nan=False):
-"""Iterate over edges of Prim's algorithm min/max spanning tree.
-
- Parameters
- ----------
- G : EasyGraph Graph
- The graph holding the tree of interest.
-
- minimum : bool (default: True)
- Find the minimum (True) or maximum (False) spanning tree.
-
- weight : string (default: 'weight')
- The name of the edge attribute holding the edge weights.
-
- data : bool (default: True)
- Flag for whether to yield edge attribute dicts.
- If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
- If False, yield edges `(u, v)`.
-
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
-
- """
- push=heappush
- pop=heappop
-
- nodes=set(G)
- c=count()
-
- sign=1ifminimumelse-1
-
- whilenodes:
- u=nodes.pop()
- frontier=[]
- visited={u}
- forv,dinG.adj[u].items():
- wt=d.get(weight,1)*sign
- ifisnan(wt):
- ifignore_nan:
- continue
- msg=f"NaN found as an edge weight. Edge {(u,v,d)}"
- raiseValueError(msg)
- push(frontier,(wt,next(c),u,v,d))
- whilefrontier:
- W,_,u,v,d=pop(frontier)
- ifvinvisitedorvnotinnodes:
- continue
- ifdata:
- yieldu,v,d
- else:
- yieldu,v
- # update frontier
- visited.add(v)
- nodes.discard(v)
- forw,d2inG.adj[v].items():
- ifwinvisited:
- continue
- new_weight=d2.get(weight,1)*sign
- push(frontier,(new_weight,next(c),v,w,d2))
-
-
-ALGORITHMS={
- "boruvka":boruvka_mst_edges,
- "borůvka":boruvka_mst_edges,
- "kruskal":kruskal_mst_edges,
- "prim":prim_mst_edges,
-}
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-defminimum_spanning_edges(
- G,algorithm="kruskal",weight="weight",data=True,ignore_nan=False
-):
-"""Generate edges in a minimum spanning forest of an undirected
- weighted graph.
-
- A minimum spanning tree is a subgraph of the graph (a tree)
- with the minimum sum of edge weights. A spanning forest is a
- union of the spanning trees for each connected component of the graph.
-
- Parameters
- ----------
- G : undirected Graph
- An undirected graph. If `G` is connected, then the algorithm finds a
- spanning tree. Otherwise, a spanning forest is found.
-
- algorithm : string
- The algorithm to use when finding a minimum spanning tree. Valid
- choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
-
- weight : string
- Edge data key to use for weight (default 'weight').
-
- data : bool, optional
- If True yield the edge data along with the edge.
-
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
-
- Returns
- -------
- edges : iterator
- An iterator over edges in a maximum spanning tree of `G`.
- Edges connecting nodes `u` and `v` are represented as tuples:
- `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
-
- Examples
- --------
- >>> from easygraph.functions.basic import mst
-
- Find minimum spanning edges by Kruskal's algorithm
-
- >>> G.add_edge(0, 3, weight=2)
- >>> mst = mst.minimum_spanning_edges(G, algorithm="kruskal", data=False)
- >>> edgelist = list(mst)
- >>> sorted(sorted(e) for e in edgelist)
- [[0, 1], [1, 2], [2, 3]]
-
- Find minimum spanning edges by Prim's algorithm
-
- >>> G.add_edge(0, 3, weight=2)
- >>> mst = mst.minimum_spanning_edges(G, algorithm="prim", data=False)
- >>> edgelist = list(mst)
- >>> sorted(sorted(e) for e in edgelist)
- [[0, 1], [1, 2], [2, 3]]
-
- Notes
- -----
- For Borůvka's algorithm, each edge must have a weight attribute, and
- each edge weight must be distinct.
-
- For the other algorithms, if the graph edges do not have a weight
- attribute a default weight of 1 will be used.
-
- Modified code from David Eppstein, April 2006
- http://www.ics.uci.edu/~eppstein/PADS/
-
- """
- try:
- algo=ALGORITHMS[algorithm]
- exceptKeyErrorase:
- msg=f"{algorithm} is not a valid choice for an algorithm."
- raiseValueError(msg)frome
-
- returnalgo(G,minimum=True,weight=weight,data=data,ignore_nan=ignore_nan)
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-defmaximum_spanning_edges(
- G,algorithm="kruskal",weight="weight",data=True,ignore_nan=False
-):
-"""Generate edges in a maximum spanning forest of an undirected
- weighted graph.
-
- A maximum spanning tree is a subgraph of the graph (a tree)
- with the maximum possible sum of edge weights. A spanning forest is a
- union of the spanning trees for each connected component of the graph.
-
- Parameters
- ----------
- G : undirected Graph
- An undirected graph. If `G` is connected, then the algorithm finds a
- spanning tree. Otherwise, a spanning forest is found.
-
- algorithm : string
- The algorithm to use when finding a maximum spanning tree. Valid
- choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
-
- weight : string
- Edge data key to use for weight (default 'weight').
-
- data : bool, optional
- If True yield the edge data along with the edge.
-
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
-
- Returns
- -------
- edges : iterator
- An iterator over edges in a maximum spanning tree of `G`.
- Edges connecting nodes `u` and `v` are represented as tuples:
- `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
-
- Examples
- --------
- >>> from easygraph.functions.path import mst
-
- Find maximum spanning edges by Kruskal's algorithm
-
- >>> G.add_edge(0, 3, weight=2)
- >>> mst = mst.maximum_spanning_edges(G, algorithm="kruskal", data=False)
- >>> edgelist = list(mst)
- >>> sorted(sorted(e) for e in edgelist)
- [[0, 1], [0, 3], [1, 2]]
-
- Find maximum spanning edges by Prim's algorithm
-
- >>> G.add_edge(0, 3, weight=2) # assign weight 2 to edge 0-3
- >>> mst = mst.maximum_spanning_edges(G, algorithm="prim", data=False)
- >>> edgelist = list(mst)
- >>> sorted(sorted(e) for e in edgelist)
- [[0, 1], [0, 3], [2, 3]]
-
- Notes
- -----
- For Borůvka's algorithm, each edge must have a weight attribute, and
- each edge weight must be distinct.
-
- For the other algorithms, if the graph edges do not have a weight
- attribute a default weight of 1 will be used.
-
- Modified code from David Eppstein, April 2006
- http://www.ics.uci.edu/~eppstein/PADS/
- """
- try:
- algo=ALGORITHMS[algorithm]
- exceptKeyErrorase:
- msg=f"{algorithm} is not a valid choice for an algorithm."
- raiseValueError(msg)frome
-
- returnalgo(G,minimum=False,weight=weight,data=data,ignore_nan=ignore_nan)
-
-
-
[docs]@not_implemented_for("multigraph")
-defminimum_spanning_tree(G,weight="weight",algorithm="kruskal",ignore_nan=False):
-"""Returns a minimum spanning tree or forest on an undirected graph `G`.
-
- Parameters
- ----------
- G : undirected graph
- An undirected graph. If `G` is connected, then the algorithm finds a
- spanning tree. Otherwise, a spanning forest is found.
-
- weight : str
- Data key to use for edge weights.
-
- algorithm : string
- The algorithm to use when finding a minimum spanning tree. Valid
- choices are 'kruskal', 'prim', or 'boruvka'. The default is
- 'kruskal'.
-
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
-
- Returns
- -------
- G : EasyGraph Graph
- A minimum spanning tree or forest.
-
- Examples
- --------
- >>> G.add_edge(0, 3, weight=2)
- >>> T = eg.minimum_spanning_tree(G)
- >>> sorted(T.edges(data=True))
- [(0, 1, {}), (1, 2, {}), (2, 3, {})]
-
-
- Notes
- -----
- For Borůvka's algorithm, each edge must have a weight attribute, and
- each edge weight must be distinct.
-
- For the other algorithms, if the graph edges do not have a weight
- attribute a default weight of 1 will be used.
-
- Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
-
- """
- edges=list(
- minimum_spanning_edges(G,algorithm,weight,data=True,ignore_nan=ignore_nan)
- )
- T=G.__class__()# Same graph class as G
- foriinG.nodes:
- T.add_node(i)
- foriinedges:
- (u,v,t)=i
- T.add_edge(u,v,**t)
- returnT
-
-
-
[docs]@not_implemented_for("multigraph")
-defmaximum_spanning_tree(G,weight="weight",algorithm="kruskal",ignore_nan=False):
-"""Returns a maximum spanning tree or forest on an undirected graph `G`.
-
- Parameters
- ----------
- G : undirected graph
- An undirected graph. If `G` is connected, then the algorithm finds a
- spanning tree. Otherwise, a spanning forest is found.
-
- weight : str
- Data key to use for edge weights.
-
- algorithm : string
- The algorithm to use when finding a maximum spanning tree. Valid
- choices are 'kruskal', 'prim', or 'boruvka'. The default is
- 'kruskal'.
-
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
-
-
- Returns
- -------
- G : EasyGraph Graph
- A maximum spanning tree or forest.
-
-
- Examples
- --------
- >>> G.add_edge(0, 3, weight=2)
- >>> T = eg.maximum_spanning_tree(G)
- >>> sorted(T.edges(data=True))
- [(0, 1, {}), (0, 3, {'weight': 2}), (1, 2, {})]
-
-
- Notes
- -----
- For Borůvka's algorithm, each edge must have a weight attribute, and
- each edge weight must be distinct.
-
- For the other algorithms, if the graph edges do not have a weight
- attribute a default weight of 1 will be used.
-
- There may be more than one tree with the same minimum or maximum weight.
- See :mod:`easygraph.tree.recognition` for more detailed definitions.
-
- Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
-
- """
- edges=list(
- maximum_spanning_edges(G,algorithm,weight,data=True,ignore_nan=ignore_nan)
- )
- T=G.__class__()# Same graph class as G
- foriinG.nodes:
- T.add_node(i)
- foriinedges:
- (u,v,t)=i
- T.add_edge(u,v,**t)
- returnT
-
-
-defedge_boundary(G,nbunch1,nbunch2=None,data=False,default=None):
-"""Returns the edge boundary of `nbunch1`.
-
- The *edge boundary* of a set *S* with respect to a set *T* is the
- set of edges (*u*, *v*) such that *u* is in *S* and *v* is in *T*.
- If *T* is not specified, it is assumed to be the set of all nodes
- not in *S*.
-
- Parameters
- ----------
- G : EasyGraph graph
-
- nbunch1 : iterable
- Iterable of nodes in the graph representing the set of nodes
- whose edge boundary will be returned. (This is the set *S* from
- the definition above.)
-
- nbunch2 : iterable
- Iterable of nodes representing the target (or "exterior") set of
- nodes. (This is the set *T* from the definition above.) If not
- specified, this is assumed to be the set of all nodes in `G`
- not in `nbunch1`.
-
- data : bool or object
- This parameter has the same meaning as in
- :meth:`MultiGraph.edges`.
-
- default : object
- This parameter has the same meaning as in
- :meth:`MultiGraph.edges`.
-
- Returns
- -------
- iterator
- An iterator over the edges in the boundary of `nbunch1` with
- respect to `nbunch2`. If `keys`, `data`, or `default`
- are specified and `G` is a multigraph, then edges are returned
- with keys and/or data, as in :meth:`MultiGraph.edges`.
-
- Notes
- -----
- Any element of `nbunch` that is not in the graph `G` will be
- ignored.
-
- `nbunch1` and `nbunch2` are usually meant to be disjoint, but in
- the interest of speed and generality, that is not required here.
-
- """
- nset1={vforvinGifvinnbunch1}
- # Here we create an iterator over edges incident to nodes in the set
- # `nset1`. The `Graph.edges()` method does not provide a guarantee
- # on the orientation of the edges, so our algorithm below must
- # handle the case in which exactly one orientation, either (u, v) or
- # (v, u), appears in this iterable.
- edges=G.edges(nset1,data=data,default=default)
- # If `nbunch2` is not provided, then it is assumed to be the set
- # complement of `nbunch1`. For the sake of efficiency, this is
- # implemented by using the `not in` operator, instead of by creating
- # an additional set and using the `in` operator.
- ifnbunch2isNone:
- return(eforeinedgesif(e[0]innset1)^(e[1]innset1))
- nset2=set(nbunch2)
- return(
- e
- foreinedges
- if(e[0]innset1ande[1]innset2)or(e[1]innset1ande[0]innset2)
- )
-
-
-"""
-Union-find data structure.
-"""
-
-
-classUnionFind:
-"""Union-find data structure.
-
- Each unionFind instance X maintains a family of disjoint sets of
- hashable objects, supporting the following two methods:
-
- - X[item] returns a name for the set containing the given item.
- Each set is named by an arbitrarily-chosen one of its members; as
- long as the set remains unchanged it will keep the same name. If
- the item is not yet part of a set in X, a new singleton set is
- created for it.
-
- - X.union(item1, item2, ...) merges the sets containing each item
- into a single larger set. If any item is not yet part of a set
- in X, it is added to X as one of the members of the merged set.
-
- Union-find data structure. Based on Josiah Carlson's code,
- http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/215912
- with significant additional changes by D. Eppstein.
- http://www.ics.uci.edu/~eppstein/PADS/UnionFind.py
-
- """
-
- def__init__(self,elements=None):
-"""Create a new empty union-find structure.
-
- If *elements* is an iterable, this structure will be initialized
- with the discrete partition on the given set of elements.
-
- """
- ifelementsisNone:
- elements=()
- self.parents={}
- self.weights={}
- forxinelements:
- self.weights[x]=1
- self.parents[x]=x
-
- def__getitem__(self,object):
-"""Find and return the name of the set containing the object."""
-
- # check for previously unknown object
- ifobjectnotinself.parents:
- self.parents[object]=object
- self.weights[object]=1
- returnobject
-
- # find basic of objects leading to the root
- path=[object]
- root=self.parents[object]
- whileroot!=path[-1]:
- path.append(root)
- root=self.parents[root]
-
- # compress the basic and return
- forancestorinpath:
- self.parents[ancestor]=root
- returnroot
-
- def__iter__(self):
-"""Iterate through all items ever found or unioned by this structure."""
- returniter(self.parents)
-
- defto_sets(self):
-"""Iterates over the sets stored in this structure.
-
- For example::
-
- >>> partition = UnionFind("xyz")
- >>> sorted(map(sorted, partition.to_sets()))
- [['x'], ['y'], ['z']]
- >>> partition.union("x", "y")
- >>> sorted(map(sorted, partition.to_sets()))
- [['x', 'y'], ['z']]
-
- """
- # Ensure fully pruned paths
-
- defgroups(parents:dict):
- sets={}
- forv,kinparents.items():
- ifknotinsets:
- sets[k]=set()
- sets[k].add(v)
- returnsets
-
- forxinself.parents.keys():
- _=self[x]# Evaluated for side-effect only
-
- yield fromgroups(self.parents).values()
-
- defunion(self,*objects):
-"""Find the sets containing the objects and merge them all."""
- # Find the heaviest root according to its weight.
- roots=iter(sorted({self[x]forxinobjects},key=lambdar:self.weights[r]))
- try:
- root=next(roots)
- exceptStopIteration:
- return
-
- forrinroots:
- self.weights[root]+=self.weights[r]
- self.parents[r]=root
-
[docs]@hybrid("cpp_spfa")
-defSpfa(G,node,weight="weight"):
- raiseEasyGraphError("Please input GraphC or DiGraphC.")
-
-
-
[docs]@not_implemented_for("multigraph")
-defDijkstra(G,node,weight="weight"):
-"""Returns the length of paths from the certain node to remaining nodes
-
- Parameters
- ----------
- G : graph
- weighted graph
- node : int
-
- Returns
- -------
- result_dict : dict
- the length of paths from the certain node to remaining nodes
-
- Examples
- --------
- Returns the length of paths from node 1 to remaining nodes
-
- >>> Dijkstra(G,node=1,weight="weight")
-
- """
- returnsingle_source_dijkstra(G,node,weight=weight)
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-@hybrid("cpp_Floyd")
-defFloyd(G,weight="weight"):
-"""Returns the length of paths from all nodes to remaining nodes
-
- Parameters
- ----------
- G : graph
- weighted graph
-
- Returns
- -------
- result_dict : dict
- the length of paths from all nodes to remaining nodes
-
- Examples
- --------
- Returns the length of paths from all nodes to remaining nodes
-
- >>> Floyd(G,weight="weight")
-
- """
- adj=G.adj.copy()
- result_dict={}
- foriinG:
- result_dict[i]={}
- foriinG:
- temp_key=adj[i].keys()
- forjinG:
- ifjintemp_key:
- result_dict[i][j]=adj[i][j].get(weight,1)
- else:
- result_dict[i][j]=float("inf")
- ifi==j:
- result_dict[i][i]=0
- forkinG:
- foriinG:
- forjinG:
- temp=result_dict[i][k]+result_dict[k][j]
- ifresult_dict[i][j]>temp:
- result_dict[i][j]=temp
- returnresult_dict
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-@hybrid("cpp_Prim")
-defPrim(G,weight="weight"):
-"""Returns the edges that make up the minimum spanning tree
-
- Parameters
- ----------
- G : graph
- weighted graph
-
- Returns
- -------
- result_dict : dict
- the edges that make up the minimum spanning tree
-
- Examples
- --------
- Returns the edges that make up the minimum spanning tree
-
- >>> Prim(G,weight="weight")
-
- """
- adj=G.adj.copy()
- result_dict={}
- foriinG:
- result_dict[i]={}
- selected=[]
- candidate=[]
- foriinG:
- ifnotselected:
- selected.append(i)
- else:
- candidate.append(i)
- whilelen(candidate):
- start=None
- end=None
- min_weight=float("inf")
- foriinselected:
- forjincandidate:
- ifiinGandjinG[i]andadj[i][j].get(weight,1)<min_weight:
- start=i
- end=j
- min_weight=adj[i][j].get(weight,1)
- ifstart!=Noneandend!=None:
- result_dict[start][end]=min_weight
- selected.append(end)
- candidate.remove(end)
- else:
- break
- returnresult_dict
-
-
-
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-@hybrid("cpp_Kruskal")
-defKruskal(G,weight="weight"):
-"""Returns the edges that make up the minimum spanning tree
-
- Parameters
- ----------
- G : graph
- weighted graph
-
- Returns
- -------
- result_dict : dict
- the edges that make up the minimum spanning tree
-
- Examples
- --------
- Returns the edges that make up the minimum spanning tree
-
- >>> Kruskal(G,weight="weight")
-
- """
- adj=G.adj.copy()
- result_dict={}
- edge_list=[]
- foriinG:
- result_dict[i]={}
- foriinG:
- forjinG[i]:
- wt=adj[i][j].get(weight,1)
- edge_list.append([i,j,wt])
- edge_list.sort(key=lambdaa:a[2])
- group=[[i]foriinG]
- foredgeinedge_list:
- foriinrange(len(group)):
- ifedge[0]ingroup[i]:
- m=i
- ifedge[1]ingroup[i]:
- n=i
- ifm!=n:
- result_dict[edge[0]][edge[1]]=edge[2]
- group[m]=group[m]+group[n]
- group[n]=[]
- returnresult_dict
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-defcommon_greedy(G,k,c=1.0,weight="weight"):
-"""Common greedy method for structural hole spanners detection.
-
- Returns top k nodes as structural hole spanners,
- Algorithm 1 of [1]_
-
- Parameters
- ----------
- G : easygraph.Graph
- An undirected graph.
-
- k : int
- top - k structural hole spanners
-
- c : float, optional (default : 1.0)
- To define zeta: zeta = c * (n*n*n), and zeta is the large
- value assigned as the shortest distance of two unreachable
- vertices.
- Default is 1.
-
- weight : String or None, optional (default : 'weight')
- Key for edge weight. None if not concerning about edge weight.
-
- Returns
- -------
- common_greedy : list
- The list of each top-k structural hole spanners.
-
- See Also
- --------
- AP_Greedy
-
- Examples
- --------
- Returns the top k nodes as structural hole spanners, using **common_greedy**.
-
- >>> common_greedy(G,
- ... k = 3, # To find top three structural holes spanners.
- ... c = 1.0, # To define zeta: zeta = c * (n*n*n), and zeta is the large value assigned as the shortest distance of two unreachable vertices.
- ... weight = 'weight')
-
- References
- ----------
- .. [1] https://dl.acm.org/profile/81484650642
-
- """
- v_sns=[]
- G_i=G.copy()
- N=len(G)
- foriinrange(k):
- sorted_nodes=sort_nodes_by_degree(G_i,weight)
- C_max=0
-
- forjinrange(N-i):
- G_i_j=G_i.copy()
- G_i_j.remove_node(sorted_nodes[j])
- upper_bound=procedure1(G_i_j,c)
- ifupper_bound<C_max:
- pass
- else:
- sum_all_shortest_paths=procedure2(G_i_j,c)
- ifsum_all_shortest_paths>=C_max:
- v_i=sorted_nodes[j]
- C_max=sum_all_shortest_paths
- else:
- pass
- delG_i_j
-
- v_sns.append(v_i)
- G_i.remove_node(v_i)
-
- delG_i
- returnv_sns
[docs]@not_implemented_for("multigraph")
-@only_implemented_for_UnDirected_graph
-defAP_Greedy(G,k,c=1.0,weight="weight"):
-"""AP greedy method for structural hole spanners detection.
-
- Returns top k nodes as structural hole spanners,
- Algorithm 2 of [1]_
-
- Parameters
- ----------
- G : easygraph.Graph
- An undirected graph.
-
- k : int
- top - k structural hole spanners
-
- c : float, optional (default : 1.0)
- To define zeta: zeta = c * (n*n*n), and zeta is the large
- value assigned as the shortest distance of two unreachable
- vertices.
- Default is 1.
-
- weight : String or None, optional (default : 'weight')
- Key for edge weight. None if not concerning about edge weight.
-
- Returns
- -------
- AP_greedy : list
- The list of each top-k structural hole spanners.
-
- Examples
- --------
- Returns the top k nodes as structural hole spanners, using **AP_greedy**.
-
- >>> AP_greedy(G,
- ... k = 3, # To find top three structural holes spanners.
- ... c = 1.0, # To define zeta: zeta = c * (n*n*n), and zeta is the large value assigned as the shortest distance of two unreachable vertices.
- ... weight = 'weight')
-
- References
- ----------
- .. [1] https://dl.acm.org/profile/81484650642
- """
- v_sns=[]
- G_i=G.copy()
- N=len(G)
- foriinrange(k):
- v_ap,lower_bound=_get_lower_bound_of_ap_nodes(G_i,c)
- upper_bound=_get_upper_bound_of_non_ap_nodes(G_i,v_ap,c)
- lower_bound=sorted(lower_bound.items(),key=lambdax:x[1],reverse=True)
-
- # print(upper_bound)
- # print(lower_bound)
- iflen(lower_bound)!=0andlower_bound[0][1]>max(upper_bound):
- v_i=lower_bound[0][0]
- else:# If acticulation points not chosen, use common_greedy instead.
- sorted_nodes=sort_nodes_by_degree(G_i,weight)
- C_max=0
-
- forjinrange(N-i):
- G_i_j=G_i.copy()
- G_i_j.remove_node(sorted_nodes[j])
- upper_bound=procedure1(G_i_j,c)
- ifupper_bound<C_max:
- pass
- else:
- sum_all_shortest_paths=procedure2(G_i_j,c)
- ifsum_all_shortest_paths>=C_max:
- v_i=sorted_nodes[j]
- C_max=sum_all_shortest_paths
- else:
- pass
- delG_i_j
-
- v_sns.append(v_i)
- G_i.remove_node(v_i)
-
- delG_i
- returnv_sns
-
-
-def_get_lower_bound_of_ap_nodes(G,c=1.0):
-"""
- Returns the articulation points and lower bound for each of them.
- Procedure 3 of https://dl.acm.org/profile/81484650642
-
- Parameters
- ----------
- G : graph
- An undirected graph.
-
- c : float
- To define zeta: zeta = c * (n*n*n), and zeta is the large
- value assigned as the shortest distance of two unreachable
- vertices.
- Default is 1.
- """
- v_ap=[]
- lower_bound=dict()
-
- N_G=len(G)
- zeta=c*math.pow(N_G,3)
- components=connected_components(G)
- forcomponentincomponents:
- component_subgraph=G.nodes_subgraph(from_nodes=list(component))
- articulation_points=list(generator_articulation_points(component_subgraph))
- N_component=len(component_subgraph)
- forarticulationinarticulation_points:
- component_subgraph_after_remove=component_subgraph.copy()
- component_subgraph_after_remove.remove_node(articulation)
-
- lower_bound_value=0
- lower_bound_value+=sum(
- (len(temp)*(N_G-len(temp)))fortempincomponents
- )
- lower_bound_value+=sum(
- (len(temp)*(N_component-1-len(temp)))
- fortempinconnected_components(component_subgraph_after_remove)
- )
- lower_bound_value+=2*N_component-2*N_G
- lower_bound_value*=zeta
-
- v_ap.append(articulation)
- lower_bound[articulation]=lower_bound_value
-
- delcomponent_subgraph_after_remove
-
- delcomponent_subgraph
-
- returnv_ap,lower_bound
-
-
-def_get_upper_bound_of_non_ap_nodes(G,ap:list,c=1.0):
-"""
- Returns the upper bound value for each non-articulation points.
- Eq.(14) of https://dl.acm.org/profile/81484650642
-
- Parameters
- ----------
- G : graph
- An undirected graph.
-
- ap : list
- Articulation points of G.
-
- c : float
- To define zeta: zeta = c * (n*n*n), and zeta is the large
- value assigned as the shortest distance of two unreachable
- vertices.
- Default is 1.
- """
- upper_bound=[]
-
- N_G=len(G)
- zeta=c*math.pow(N_G,3)
- components=connected_components(G)
- forcomponentincomponents:
- non_articulation_points=component-set(ap)
- fornodeinnon_articulation_points:
- upper_bound_value=0
- upper_bound_value+=sum(
- (len(temp)*(N_G-len(temp)))fortempincomponents
- )
- upper_bound_value+=2*len(component)+1-2*N_G
- upper_bound_value*=zeta
-
- upper_bound.append(upper_bound_value)
-
- returnupper_bound
-
[docs]@not_implemented_for("multigraph")
-defget_structural_holes_HIS(G,C:List[frozenset],epsilon=1e-4,weight="weight"):
-"""Structural hole spanners detection via HIS method.
-
- Both **HIS** and **MaxD** are methods in [1]_.
- The authors developed these two methods to find the structural holes spanners,
- based on theory of information diffusion.
-
- Returns the value of `S`, `I`, `H` ,defined in **HIS** of [1], of each node in the graph.
- Note that `H` quantifies the possibility that a node is a structural hole spanner.
- To use `HIS` method, you should provide the community detection result as parameter.
-
- Parameters
- ----------
- C : list of frozenset
- Each frozenset denotes a community of nodes.
-
- epsilon : float
- The threshold value.
-
- weight : string, optional (default : 'weight')
- The key for edge weight.
-
- Returns
- -------
- S : list of tuple
- The `S` value in [1]_.
-
- I : float
- The `I` value in [1]_.
-
- H : float
- The `H` value in [1]_.
-
- See Also
- --------
- MaxD
-
- Examples
- --------
-
- >>> get_structural_holes_HIS(G,
- ... C = [frozenset([1,2,3]), frozenset([4,5,6])], # Two communities
- ... epsilon = 0.01,
- ... weight = 'weight'
- ... )
-
-
- References
- ----------
- .. [1] https://www.aminer.cn/structural-hole
-
- """
- # S: List[subset_index]
- S=[]
- forcommunity_subset_sizeinrange(2,len(C)+1):
- S.extend(list(combinations(range(len(C)),community_subset_size)))
- # I: dict[node][cmnt_index]
- # H: dict[node][subset_index]
- I,H=initialize(G,C,S,weight=weight)
-
- alphas=[0.3foriinrange(len(C))]# list[cmnt_index]
- betas=[(0.5-math.pow(0.5,len(subset)))forsubsetinS]# list[subset_index]
-
- whileTrue:
- P=update_P(G,C,alphas,betas,S,I,H)# dict[node][cmnt_index]
- I_new,H_new=update_I_H(G,C,S,P,I)
- ifis_convergence(G,C,I,I_new,epsilon):
- break
- else:
- I,H=I_new,H_new
- returnS,I,H
Source code for easygraph.functions.structural_holes.evaluation
-importmath
-
-fromeasygraph.utilsimport*
-
-
-__all__=["effective_size","efficiency","constraint","hierarchy"]
-
-
-defmutual_weight(G,u,v,weight=None):
- try:
- a_uv=G[u][v].get(weight,1)
- exceptKeyError:
- a_uv=0
- try:
- a_vu=G[v][u].get(weight,1)
- exceptKeyError:
- a_vu=0
- returna_uv+a_vu
-
-
-sum_nmw_rec={}
-max_nmw_rec={}
-
-
-defnormalized_mutual_weight(G,u,v,norm=sum,weight=None):
- ifnorm==sum:
- try:
- returnsum_nmw_rec[(u,v)]
- exceptKeyError:
- scale=norm(
- mutual_weight(G,u,w,weight)forwinset(G.all_neighbors(u))
- )
- nmw=0ifscale==0elsemutual_weight(G,u,v,weight)/scale
- sum_nmw_rec[(u,v)]=nmw
- returnnmw
- elifnorm==max:
- try:
- returnmax_nmw_rec[(u,v)]
- exceptKeyError:
- scale=norm(
- mutual_weight(G,u,w,weight)forwinset(G.all_neighbors(u))
- )
- nmw=0ifscale==0elsemutual_weight(G,u,v,weight)/scale
- max_nmw_rec[(u,v)]=nmw
- returnnmw
-
-
-defeffective_size_parallel(nodes,G,weight):
- ret=[]
- fornodeinnodes:
- neighbors_of_node=set(G.all_neighbors(node))
- iflen(neighbors_of_node)==0:
- ret.append([node,float("nan")])
- continue
- ret.append(
- [node,sum(redundancy(G,node,u,weight)foruinneighbors_of_node)]
- )
- returnret
-
-
-defeffective_size_borgatti_parallel(nodes,G,weight):
- ret=[]
- fornodeinnodes:
- # Effective size is not defined for isolated nodes
- iflen(G[node])==0:
- ret.append([node,float("nan")])
- continue
- E=G.ego_subgraph(node)
- E.remove_node(node)
- ret.append([node,len(E)-(2*E.size())/len(E)])
- returnret
-
-
-defredundancy(G,u,v,weight=None):
- nmw=normalized_mutual_weight
- r=sum(
- nmw(G,u,w,weight=weight)*nmw(G,v,w,norm=max,weight=weight)
- forwinset(G.all_neighbors(u))
- )
- return1-r
-
-
-@not_implemented_for("multigraph")
-@hybrid("cpp_effective_size")
-defeffective_size(G,nodes=None,weight=None,n_workers=None):
-"""Burt's metric - Effective Size.
- Parameters
- ----------
- G : easygraph.Graph or easygraph.DiGraph
- nodes : list of nodes or None, optional (default : None)
- The nodes you want to calculate. If *None*, all nodes in `G` will be calculated.
- weight : string or None, optional (default : None)
- The key for edge weight. If *None*, `G` will be regarded as unweighted graph.
- Returns
- -------
- effective_size : dict
- The Effective Size of node in `nodes`.
- Examples
- --------
- >>> effective_size(G,
- ... nodes=[1,2,3], # Compute the Effective Size of some nodes. The default is None for all nodes in G.
- ... weight='weight' # The weight key of the graph. The default is None for unweighted graph.
- ... )
- References
- ----------
- .. [1] Burt R S. Structural holes: The social structure of competition[M].
- Harvard university press, 2009.
- """
- sum_nmw_rec.clear()
- max_nmw_rec.clear()
- effective_size={}
- ifnodesisNone:
- nodes=G
- # Use Borgatti's simplified formula for unweighted and undirected graphs
- ifnotG.is_directed()andweightisNone:
- ifn_workersisnotNone:
- importrandom
-
- fromfunctoolsimportpartial
- frommultiprocessingimportPool
-
- local_function=partial(
- effective_size_borgatti_parallel,G=G,weight=weight
- )
- nodes=list(nodes)
- random.shuffle(nodes)
- iflen(nodes)>n_workers*50000:
- nodes=split_len(nodes,step=50000)
- else:
- nodes=split(nodes,n_workers)
- withPool(n_workers)asp:
- ret=p.imap(local_function,nodes)
- res=[xforiinretforxini]
- effective_size=dict(res)
- else:
- forvinnodes:
- # Effective size is not defined for isolated nodes
- iflen(G[v])==0:
- effective_size[v]=float("nan")
- continue
- E=G.ego_subgraph(v)
- E.remove_node(v)
- effective_size[v]=len(E)-(2*E.size())/len(E)
- else:
- ifn_workersisnotNone:
- importrandom
-
- fromfunctoolsimportpartial
- frommultiprocessingimportPool
-
- local_function=partial(effective_size_parallel,G=G,weight=weight)
- nodes=list(nodes)
- random.shuffle(nodes)
- iflen(nodes)>n_workers*30000:
- nodes=split_len(nodes,step=30000)
- else:
- nodes=split(nodes,n_workers)
- withPool(n_workers)asp:
- ret=p.imap(local_function,nodes)
- res=[xforiinretforxini]
- effective_size=dict(res)
- else:
- forvinnodes:
- # Effective size is not defined for isolated nodes
- iflen(G[v])==0:
- effective_size[v]=float("nan")
- continue
- effective_size[v]=sum(
- redundancy(G,v,u,weight)foruinset(G.all_neighbors(v))
- )
- returneffective_size
-
-
-
[docs]@not_implemented_for("multigraph")
-defefficiency(G,nodes=None,weight=None):
-"""Burt's metric - Efficiency.
- Parameters
- ----------
- G : easygraph.Graph
- nodes : list of nodes or None, optional (default : None)
- The nodes you want to calculate. If *None*, all nodes in `G` will be calculated.
- weight : string or None, optional (default : None)
- The key for edge weight. If *None*, `G` will be regarded as unweighted graph.
- Returns
- -------
- efficiency : dict
- The Efficiency of node in `nodes`.
- Examples
- --------
- >>> efficiency(G,
- ... nodes=[1,2,3], # Compute the Efficiency of some nodes. The default is None for all nodes in G.
- ... weight='weight' # The weight key of the graph. The default is None for unweighted graph.
- ... )
- References
- ----------
- .. [1] Burt R S. Structural holes: The social structure of competition[M].
- Harvard university press, 2009.
- """
- e_size=effective_size(G,nodes=nodes,weight=weight)
- degree=G.degree(weight=weight)
- efficiency={n:v/degree[n]forn,vine_size.items()}
- returnefficiency
-
-
-defcompute_constraint_of_nodes(nodes,G,weight):
- ret=[]
- fornodeinnodes:
- neighbors_of_node=set(G.all_neighbors(node))
- iflen(neighbors_of_node)==0:
- ret.append([node,float("nan")])
- continue
- ret.append(
- [node,sum(local_constraint(G,node,u,weight)foruinneighbors_of_node)]
- )
- returnret
-
-
-@not_implemented_for("multigraph")
-@hybrid("cpp_constraint")
-defconstraint(G,nodes=None,weight=None,n_workers=None):
-"""Burt's metric - Constraint.
- Parameters
- ----------
- G : easygraph.Graph
- nodes : list of nodes or None, optional (default : None)
- The nodes you want to calculate. If *None*, all nodes in `G` will be calculated.
- weight : string or None, optional (default : None)
- The key for edge weight. If *None*, `G` will be regarded as unweighted graph.
- workers : int or None, optional (default : None)
- The number of workers calculating (default: None).
- None if not using only one worker.
- Returns
- -------
- constraint : dict
- The Constraint of node in `nodes`.
- Examples
- --------
- >>> constraint(G,
- ... nodes=[1,2,3], # Compute the Constraint of some nodes. The default is None for all nodes in G.
- ... weight='weight', # The weight key of the graph. The default is None for unweighted graph.
- ... n_workers=4 # Parallel computing on four workers. The default is None for serial computing.
- ... )
- References
- ----------
- .. [1] Burt R S. Structural holes: The social structure of competition[M].
- Harvard university press, 2009.
- """
- sum_nmw_rec.clear()
- max_nmw_rec.clear()
- local_constraint_rec.clear()
- ifnodesisNone:
- nodes=G.nodes
- constraint={}
-
- defcompute_constraint_of_v(v):
- neighbors_of_v=set(G.all_neighbors(v))
- iflen(neighbors_of_v)==0:
- constraint_of_v=float("nan")
- else:
- constraint_of_v=sum(
- local_constraint(G,v,n,weight)forninneighbors_of_v
- )
- returnv,constraint_of_v
-
- ifn_workersisnotNone:
- importrandom
-
- fromfunctoolsimportpartial
- frommultiprocessingimportPool
-
- local_function=partial(compute_constraint_of_nodes,G=G,weight=weight)
- nodes=list(nodes)
- random.shuffle(nodes)
- iflen(nodes)>n_workers*30000:
- nodes=split_len(nodes,step=30000)
- else:
- nodes=split(nodes,n_workers)
- withPool(n_workers)asp:
- ret=p.imap(local_function,nodes)
- constraint_results=[xforiinretforxini]
- else:
- constraint_results=[]
- forvinnodes:
- constraint_results.append(compute_constraint_of_v(v))
-
- constraint=dict(constraint_results)
- returnconstraint
-
-
-local_constraint_rec={}
-
-
-deflocal_constraint(G,u,v,weight=None):
- try:
- returnlocal_constraint_rec[(u,v)]
- exceptKeyError:
- nmw=normalized_mutual_weight
- direct=nmw(G,u,v,weight=weight)
- indirect=sum(
- nmw(G,u,w,weight=weight)*nmw(G,w,v,weight=weight)
- forwinset(G.all_neighbors(u))
- )
- result=(direct+indirect)**2
- local_constraint_rec[(u,v)]=result
- returnresult
-
-
-defhierarchy_parallel(nodes,G,weight):
- ret=[]
- forvinnodes:
- E=G.ego_subgraph(v)
- n=len(E)-1
- C=0
- c={}
- neighbors_of_v=set(G.all_neighbors(v))
- forwinneighbors_of_v:
- C+=local_constraint(G,v,w,weight)
- c[w]=local_constraint(G,v,w,weight)
- ifn>1:
- ret.append(
- [
- v,
- sum(
- c[w]/C*n*math.log(c[w]/C*n)/(n*math.log(n))
- forwinneighbors_of_v
- ),
- ]
- )
- else:
- ret.append([v,0])
-
- returnret
-
-
-@not_implemented_for("multigraph")
-@hybrid("cpp_hierarchy")
-defhierarchy(G,nodes=None,weight=None,n_workers=None):
-"""Returns the hierarchy of nodes in the graph
- Parameters
- ----------
- G : graph
- nodes : dict, optional (default: None)
- weight : dict, optional (default: None)
- Returns
- -------
- hierarchy : dict
- the hierarchy of nodes in the graph
- Examples
- --------
- Returns the hierarchy of nodes in the graph G
- >>> hierarchy(G)
- Reference
- ---------
- https://m.book118.com/html/2019/0318/5320024122002021.shtm
- """
- sum_nmw_rec.clear()
- max_nmw_rec.clear()
- local_constraint_rec.clear()
- ifnodesisNone:
- nodes=G.nodes
- hierarchy={}
- ifn_workersisnotNone:
- importrandom
-
- fromfunctoolsimportpartial
- frommultiprocessingimportPool
-
- local_function=partial(hierarchy_parallel,G=G,weight=weight)
- nodes=list(nodes)
- random.shuffle(nodes)
- iflen(nodes)>n_workers*30000:
- nodes=split_len(nodes,step=30000)
- else:
- nodes=split(nodes,n_workers)
- withPool(n_workers)asp:
- ret=p.imap(local_function,nodes)
- res=[xforiinretforxini]
- hierarchy=dict(res)
- else:
- forvinnodes:
- E=G.ego_subgraph(v)
- n=len(E)-1
- C=0
- c={}
- neighbors_of_v=set(G.all_neighbors(v))
- forwinneighbors_of_v:
- C+=local_constraint(G,v,w,weight)
- c[w]=local_constraint(G,v,w,weight)
- ifn>1:
- hierarchy[v]=sum(
- c[w]/C*n*math.log(c[w]/C*n)/(n*math.log(n))
- forwinneighbors_of_v
- )
- ifvnotinhierarchy:
- hierarchy[v]=0
- returnhierarchy
-
[docs]@not_implemented_for("multigraph")
-defsum_of_shortest_paths(G,S):
-r"""Returns the difference between the sum of lengths of all pairs shortest paths in G and the one in G\S.
- The experiment metrics in [1]_
-
- Parameters
- ----------
- G: easygraph.Graph or easygraph.DiGraph
-
- S: list of int
- A list of nodes witch are structural hole spanners.
-
- Returns
- -------
- differ_between_sum : int
- The difference between the sum of lengths of all pairs shortest paths in G and the one in G\S.
- C(G/S)-C(G)
-
- Examples
- --------
- >>> G_t=eg.datasets.get_graph_blogcatalog()
- >>> S_t=eg.AP_Greedy(G_t, 10000)
- >>> diff = sum_of_shortest_paths(G_t, S_t)
- >>> print(diff)
-
- References
- ----------
- .. [1] https://dl.acm.org/profile/81484650642
-
- """
- mat_G=eg.Floyd(G)
- sum_G=0
- inf_const_G=math.ceil((G.number_of_nodes()**3)/3)
- foriinmat_G.values():
- forjini.values():
- ifmath.isinf(j):
- j=inf_const_G
- sum_G+=j
- G_S=G.copy()
- G_S.remove_nodes(S)
- mat_G_S=eg.Floyd(G_S)
- sum_G_S=0
- inf_const_G_S=math.ceil((G_S.number_of_nodes()**3)/3)
- foriinmat_G_S.values():
- forjini.values():
- ifmath.isinf(j):
- j=inf_const_G_S
- sum_G_S+=j
- returnsum_G_S-sum_G
-
-
-
[docs]@not_implemented_for("multigraph")
-defnodes_of_max_cc_without_shs(G,S):
-r"""Returns the number of nodes in the maximum connected component in graph G\S.
- The experiment metrics in [1]_
-
- Parameters
- ----------
- G: easygraph.Graph or easygraph.DiGraph
-
- S: list of int
- A list of nodes witch are structural hole spanners.
-
- Returns
- -------
- G_S_nodes_of_max_CC: int
- The number of nodes in the maximum connected component in graph G\S.
-
- Examples
- --------
- >>> G_t=eg.datasets.get_graph_blogcatalog()
- >>> S_t=eg.AP_Greedy(G_t, 10000)
- >>> maxx = nodes_of_max_cc_without_shs(G_t, S_t)
- >>> print(maxx)
-
- References
- ----------
- .. [1] https://dl.acm.org/profile/81484650642
-
- """
- G_S=G.copy()
- G_S.remove_nodes(S)
- ccs=eg.connected_components(G_S)
- max_num=0
- forccinccs:
- iflen(cc)>max_num:
- max_num=len(cc)
- returnmax_num
Source code for easygraph.functions.structural_holes.weakTie
-importeasygraphaseg
-
-fromeasygraph.utilsimport*
-
-
-__all__=[
- "weakTie",
- "weakTieLocal",
-]
-
-
-def_computeTieStrength(G,node_u,node_v):
- F_u=set(G.neighbors(node=node_u))
- F_u.add(node_u)
- F_v=set(G.neighbors(node=node_v))
- F_v.add(node_v)
- uni=len(F_u.union(F_v))
- inter=len(F_u.intersection(F_v))
- S_uv=inter/uni
- G[node_u][node_v]["strength"]=S_uv
-
-
-def_computeAllTieStrength(G):
- foredgeinG.edges:
- node_u=edge[0]
- node_v=edge[1]
- _computeTieStrength(G,node_u,node_v)
- # print(G.edges)
-
-
-def_strongly_connected_components(G,threshold):
-"""Generate nodes in strongly connected components of graph with constraint threshold.
-
- Parameters
- ----------
- G : easygraph.DiGraph
- A directed graph.
-
- threshold: float
- the edge whose tie strength is smaller than threshold will be ignored.
-
- Returns
- -------
- comp : generator of sets
- A generator of sets of nodes, one for each strongly connected
- component of G.
-
- Examples
- --------
- # >>> _strongly_connected_components(G, 0.2)
-
- Notes
- -----
- Uses Tarjan's algorithm[1]_ with Nuutila's modifications[2]_.
- Nonrecursive version of algorithm.
-
- References
- ----------
- .. [1] Depth-first search and linear graph algorithms, R. Tarjan
- SIAM Journal of Computing 1(2):146-160, (1972).
-
- .. [2] On finding the strongly connected components in a directed graph.
- E. Nuutila and E. Soisalon-Soinen
- Information Processing Letters 49(1): 9-14, (1994)..
-
- """
- preorder={}
- lowlink={}
- scc_found=set()
- scc_queue=[]
- i=0# Preorder counter
- forsourceinG:
- ifsourcenotinscc_found:
- queue=[source]
- whilequeue:
- v=queue[-1]
- ifvnotinpreorder:
- i=i+1
- preorder[v]=i
- done=True
- forwinG[v]:
- ifG[v][w]["strength"]>=threshold:
- ifwnotinpreorder:
- queue.append(w)
- done=False
- break
- ifdone:
- lowlink[v]=preorder[v]
- forwinG[v]:
- ifG[v][w]["strength"]>=threshold:
- ifwnotinscc_found:
- ifpreorder[w]>preorder[v]:
- lowlink[v]=min([lowlink[v],lowlink[w]])
- else:
- lowlink[v]=min([lowlink[v],preorder[w]])
- queue.pop()
- iflowlink[v]==preorder[v]:
- scc={v}
- whilescc_queueandpreorder[scc_queue[-1]]>preorder[v]:
- k=scc_queue.pop()
- scc.add(k)
- scc_found.update(scc)
- yieldscc
- else:
- scc_queue.append(v)
-
-
-def_computeCloseness(G,c,u,threshold,length):
- n=0
- strength_sum_u=0
- forvinc:
- ifuinG[v]andv!=u:
- ifG[v][u]["strength"]!=0:
- n+=1
- strength_sum_u+=G[v][u]["strength"]
- closeness_c_u=(strength_sum_u-n*threshold)/length
- returncloseness_c_u
-
-
-def_computeScore(G,threshold):
- score_dict={}
- fornodeinG.nodes:
- score_dict[node]=0
- forcin_strongly_connected_components(G,threshold):
- length=len(c)
- foruinG.nodes:
- closeness_c_u=_computeCloseness(G,c,u,threshold,length)
- ifcloseness_c_u<0:
- score_dict[u]+=(-1)*closeness_c_u
- returnscore_dict
-
-
-
[docs]@not_implemented_for("multigraph")
-defweakTie(G,threshold,k):
-"""Return top-k nodes with highest scores which were computed by WeakTie method.
-
- Parameters
- ----------
- G: easygraph.DiGraph
-
- k: int
- top - k nodes with highest scores.
-
- threshold: float
- tie strength threshold.
-
- Returns
- -------
- SHS_list : list
- The list of each nodes with highest scores.
-
- score_dict: dict
- The score of each node, can be used for WeakTie-Local and WeakTie-Bi.
-
- See Also
- -------
- weakTieLocal
-
- Examples
- --------
- # >>> SHS_list,score_dict=weakTie(G, 0.2, 3)
-
- References
- ----------
- .. [1] Mining Brokers in Dynamic Social Networks. Chonggang Song, Wynne Hsu, Mong Li Lee. Proc. of ACM CIKM, 2015.
-
- """
- _computeAllTieStrength(G)
- score_dict=_computeScore(G,threshold)
- ordered_set=sorted(score_dict.items(),key=lambdax:x[1],reverse=True)
- SHS_list=[]
- foriinrange(k):
- SHS_list.append((ordered_set[i])[0])
- print("score dict:",score_dict)
- print("top-k nodes:",SHS_list)
- returnSHS_list,score_dict
-
-
-@not_implemented_for("multigraph")
-def_updateScore(u,G,threshold):
- score_u=0
- forcin_strongly_connected_components(G,threshold):
- length=len(c)
- closeness_c_u=_computeCloseness(G,c,u,threshold,length)
- ifcloseness_c_u<0:
- score_u-=closeness_c_u
- returnscore_u
-
-
-def_get2hop(G,node):
- neighbors=[]
- firstlevel={node:1}
- seen={}# level (number of hops) when seen in BFS
- level=0# the current level
- nextlevel=set(firstlevel)# set of nodes to check at next level
- n=len(G.adj)
- whilenextlevelandlevel<=2:
- thislevel=nextlevel# advance to next level
- nextlevel=set()# and start a new set (fringe)
- found=[]
- forvinthislevel:
- ifvnotinseen:
- seen[v]=level# set the level of vertex v
- found.append(v)
- # yield (v, level)
- neighbors.append(v)
- iflen(seen)==n:
- return
- forvinfound:
- nextlevel.update(G.adj[v])
- level+=1
- delseen
- returnneighbors
-
-
-def_commonUpdate(G,node_u,node_v,threshold,score_dict):
- fornode_winG.neighbors(node=node_u):
- _computeTieStrength(G,node_u,node_w)
- fornode_winG.predecessors(node=node_u):
- _computeTieStrength(G,node_w,node_u)
- G_un=eg.Graph()
- fornodeinG.nodes:
- G_un.add_node(node)
- foredgeinG.edges:
- ifnotG_un.has_edge(edge[0],edge[1]):
- G_un.add_edge(edge[0],edge[1])
- u_2hop=_get2hop(G_un,node_u)
- G_u=G.nodes_subgraph(from_nodes=u_2hop)
- v_2hop=_get2hop(G_un,node_v)
- G_v=G.nodes_subgraph(from_nodes=v_2hop)
- score_u=_updateScore(node_u,G_u,threshold)
- score_v=_updateScore(node_v,G_v,threshold)
- score_dict[node_u]=score_u
- score_dict[node_v]=score_v
- all_neigh_u=list(set(G.all_neighbors(node=node_u)))
- # print("all_neigh:",all_neigh_u)
- all_neigh_v=list(set(G.all_neighbors(node=node_v)))
- fornode_winall_neigh_u:
- ifnode_winall_neigh_v:
- w_2hop=_get2hop(G_un,node_w)
- G_w=G.nodes_subgraph(from_nodes=w_2hop)
- score_w=_updateScore(node_w,G_w,threshold)
- else:
- score_w=0
- w_2hop=_get2hop(G_un,node_w)
- G_w=G.nodes_subgraph(from_nodes=w_2hop)
- forcin_strongly_connected_components(G_w,threshold):
- ifnode_uinc:
- length=len(c)
- closeness_c_w=_computeCloseness(G,c,node_w,threshold,length)
- ifcloseness_c_w<0:
- score_w-=closeness_c_w
- score_dict[node_w]=score_w
-
-
-
[docs]defweakTieLocal(G,edges_plus,edges_delete,threshold,score_dict,k):
-"""Find brokers in evolving social networks, utilize the 2-hop neighborhood of an affected node to identify brokers.
-
- Parameters
- ----------
- G: easygraph.DiGraph
-
- edges_plus: list of list
- set of edges to be added
-
- edges_delete: list of list
- set of edges to be removed
-
- threshold: float
- tie strength threshold.
-
- score_dict: dict
- The score of each node computed before.
-
- k: int
- top - k nodes with highest scores.
-
- Returns
- -------
- SHS_list : list
- The list of each nodes with highest scores.
-
- See Also
- -------
- weakTie
-
- Examples
- --------
- # >>> SHS_list=weakTieLocal(G, [[2, 7]], [[1,3]], 0.2, score_dict, 3)
-
- References
- ----------
- .. [1] Mining Brokers in Dynamic Social Networks. Chonggang Song, Wynne Hsu, Mong Li Lee. Proc. of ACM CIKM, 2015.
-
- """
- foredgeinedges_plus:
- G.add_edge(edge[0],edge[1])
- _computeTieStrength(G,edge[0],edge[1])
- _commonUpdate(G,edge[0],edge[1],threshold,score_dict)
- foredgeinedges_delete:
- G.remove_edge(edge[0],edge[1])
- _commonUpdate(G,edge[0],edge[1],threshold,score_dict)
- ordered_set=sorted(score_dict.items(),key=lambdax:x[1],reverse=True)
- SHS_list=[]
- foriinrange(k):
- SHS_list.append((ordered_set[i])[0])
- print("updated score:",score_dict)
- print("top-k nodes:",SHS_list)
- returnSHS_list
-try:
- fromtypingimportDict
- fromtypingimportList
- fromtypingimportUnion
-
- fromeasygraph._globalimportAUTHOR_EMAIL
-
- from.baseimportBaseEvaluator
- from.classificationimportVertexClassificationEvaluator
- from.classificationimportavailable_classification_metrics
- from.hypergraphsimportHypergraphVertexClassificationEvaluator
-except:
- print(
- "Warning raise in module:ml_metrics. Please install Pytorch before you use"
- " functions related to nueral network"
- )
-
-
-
[docs]defbuild_evaluator(
- task:str,
- metric_configs:List[Union[str,Dict[str,dict]]],
- validate_index:int=0,
-):
-r"""Return the metric evaluator for the given task.
-
- Args:
- ``task`` (``str``): The type of the task. The supported types include: ``graph_vertex_classification``, ``hypergraph_vertex_classification``, and ``user_item_recommender``.
- ``metric_configs`` (``List[Union[str, Dict[str, dict]]]``): The list of metric names.
- ``validate_index`` (``int``): The specified metric index used for validation. Defaults to ``0``.
- """
- iftask=="hypergraph_vertex_classification":
- returnHypergraphVertexClassificationEvaluator(metric_configs,validate_index)
- else:
- raiseValueError(
- f"{task} is not supported yet. Please email '{AUTHOR_EMAIL}' to add it."
- )
[docs]classBaseEvaluator:
-r"""The base class for task-specified metric evaluators.
-
- Args:
- ``task`` (``str``): The type of the task. The supported types include: ``classification``, ``retrieval`` and ``recommender``.
- ``metric_configs`` (``List[Union[str, Dict[str, dict]]]``): The metric configurations. The key is the metric name and the value is the metric parameters.
- ``validate_index`` (``int``): The specified metric index used for validation. Defaults to ``0``.
- """
-
- def__init__(
- self,
- task:str,
- metric_configs:List[Union[str,Dict[str,dict]]],
- validate_index:int=0,
- ):
- self.validate_index=validate_index
- metric_configs=format_metric_configs(task,metric_configs)
- assertvalidate_index>=0andvalidate_index<len(
- metric_configs
- ),"The specified validate metric index is out of range."
- self.marker_list,self.func_list=[],[]
- formetricinmetric_configs:
- self.marker_list.append(metric["marker"])
- self.func_list.append(metric["func"])
- # init batch data containers
- self.validate_res=[]
- self.test_res_dict=defaultdict(list)
- self.last_validate_res,self.last_test_res=None,{}
-
- @abc.abstractmethod
- def__repr__(self)->str:
-r"""Print the Evaluator information."""
-
-
[docs]defvalidate_epoch_res(self):
-r"""For all added batch data, return the result of the evaluation on the specified ``validate_index``-th metric.
- """
- importnumpyasnp
-
- ifself.validate_res==[]andself.last_validate_resisnotNone:
- returnself.last_validate_res
- assertself.validate_res!=[],"No batch data added for validation."
- self.last_validate_res=np.vstack(self.validate_res).mean(0).item()
- # clear batch cache
- self.validate_res=[]
- returnself.last_validate_res
[docs]deftest_epoch_res(self):
-r"""For all added batch data, return results of the evaluation on all the ml_metrics in ``metric_configs``.
- """
- importnumpyasnp
-
- ifself.test_res_dict=={}andself.last_test_resisnotNone:
- returnself.last_test_res
- assertself.test_res_dict!={},"No batch data added for testing."
- forname,res_listinself.test_res_dict.items():
- ifnotisinstance(res_list[0],list):
- self.last_test_res[name]=(
- np.vstack(res_list).mean(0).squeeze().tolist()
- )
- else:
- self.last_test_res[name]=[
- np.vstack(sub_res_list).mean(0).squeeze().tolist()
- forsub_res_listinres_list
- ]
- # clear batch cache
- self.test_res_dict=defaultdict(list)
- returnself.last_test_res
-
-
[docs]defvalidate(self,y_true:torch.LongTensor,y_pred:torch.Tensor):
-r"""Return the result of the evaluation on the specified ``validate_index``-th metric.
-
- Args:
- ``y_true`` (``torch.LongTensor``): The ground truth labels. Size :math:`(N_{samples}, -)`.
- ``y_pred`` (``torch.Tensor``): The predicted labels. Size :math:`(N_{samples}, -)`.
- """
- returnself.func_list[self.validate_index](y_true,y_pred)
-
-
[docs]deftest(self,y_true:torch.LongTensor,y_pred:torch.Tensor):
-r"""Return results of the evaluation on all the ml_metrics in ``metric_configs``.
-
- Args:
- ``y_true`` (``torch.LongTensor``): The ground truth labels. Size :math:`(N_{samples}, -)`.
- ``y_pred`` (``torch.Tensor``): The predicted labels. Size :math:`(N_{samples}, -)`.
- """
- return{
- name:func(y_true,y_pred)
- forname,funcinzip(self.marker_list,self.func_list)
- }
[docs]defavailable_classification_metrics():
-r"""Return available ml_metrics for the classification task.
-
- The available ml_metrics are: ``accuracy``, ``f1_score``, ``confusion_matrix``.
- """
- return("accuracy","f1_score","confusion_matrix")
-
-
-def_format_inputs(y_true:torch.LongTensor,y_pred:torch.Tensor):
-r"""Format the inputs.
-
- Args:
- ``y_true`` (``torch.LongTensor``): The ground truth labels. Size :math:`(N_{samples}, )`.
- ``y_pred`` (``torch.Tensor``): The predicted labels. Size :math:`(N_{samples}, N_{class})` or :math:`(N_{samples}, )`.
- """
- asserty_true.dim()==1,"y_true must be 1D torch.LongTensor."
- asserty_pred.dim()in(1,2),"y_pred must be 1D or 2D torch.Tensor."
- y_true=y_true.cpu().detach()
- ify_pred.dim()==2:
- y_pred=y_pred.argmax(dim=1)
- y_pred=y_pred.cpu().detach()
- asserty_true.shape==y_pred.shape,"y_true and y_pred must have the same length."
- return(y_true,y_pred)
-
-
-
[docs]defaccuracy(y_true:torch.LongTensor,y_pred:torch.Tensor):
-r"""Calculate the accuracy score for the classification task.
-
- .. math::
- \text{Accuracy} = \frac{1}{N} \sum_{i=1}^{N} \mathcal{I}(y_i, \hat{y}_i),
-
- where :math:`\mathcal{I}(\cdot, \cdot)` is the indicator function, which is 1 if the two inputs are equal, and 0 otherwise.
- :math:`y_i` and :math:`\hat{y}_i` are the ground truth and predicted labels for the i-th sample.
-
- Args:
- ``y_true`` (``torch.LongTensor``): The ground truth labels. Size :math:`(N_{samples}, )`.
- ``y_pred`` (``torch.Tensor``): The predicted labels. Size :math:`(N_{samples}, N_{class})` or :math:`(N_{samples}, )`.
-
- Examples:
- >>> import torch
- >>> import easygraph.ml_metrics as dm
- >>> y_true = torch.tensor([3, 2, 4])
- >>> y_pred = torch.tensor([
- [0.2, 0.3, 0.5, 0.4, 0.3],
- [0.8, 0.2, 0.3, 0.5, 0.4],
- [0.2, 0.4, 0.5, 0.2, 0.8],
- ])
- >>> dm.classification.accuracy(y_true, y_pred)
- 0.3333333432674408
- """
- y_true,y_pred=_format_inputs(y_true,y_pred)
- return(y_true==y_pred).float().mean().item()
-
-
-
[docs]deff1_score(y_true:torch.LongTensor,y_pred:torch.Tensor,average:str="macro"):
-r"""Calculate the F1 score for the classification task.
-
- Args:
- ``y_true`` (``torch.LongTensor``): The ground truth labels. Size :math:`(N_{samples}, )`.
- ``y_pred`` (``torch.Tensor``): The predicted labels. Size :math:`(N_{samples}, N_{class})` or :math:`(N_{samples}, )`.
- ``average`` (``str``): The average method. Must be one of "macro", "micro", "weighted".
-
- Examples:
- >>> import torch
- >>> import easygraph.ml_metrics as dm
- >>> y_true = torch.tensor([3, 2, 4, 0])
- >>> y_pred = torch.tensor([
- [0.2, 0.3, 0.5, 0.4, 0.3],
- [0.8, 0.2, 0.3, 0.5, 0.4],
- [0.2, 0.4, 0.5, 0.2, 0.8],
- [0.8, 0.4, 0.5, 0.2, 0.8]
- ])
- >>> dm.classification.f1_score(y_true, y_pred, "macro")
- 0.41666666666666663
- >>> dm.classification.f1_score(y_true, y_pred, "micro")
- 0.5
- >>> dm.classification.f1_score(y_true, y_pred, "weighted")
- 0.41666666666666663
- """
- y_true,y_pred=_format_inputs(y_true,y_pred)
- returnsm.f1_score(y_true,y_pred,average=average)
[docs]classVertexClassificationEvaluator(BaseEvaluator):
-r"""Return the metric evaluator for vertex classification task. The supported ml_metrics includes: ``accuracy``, ``f1_score``, ``confusion_matrix``.
-
- Args:
- ``metric_configs`` (``List[Union[str, Dict[str, dict]]]``): The metric configurations. The key is the metric name and the value is the metric parameters.
- ``validate_index`` (``int``): The specified metric index used for validation. Defaults to ``0``.
- """
-
- def__init__(
- self,
- metric_configs:List[Union[str,Dict[str,dict]]],
- validate_index:int=0,
- ):
- super().__init__("classification",metric_configs,validate_index)
-
-
[docs]defvalidate(self,y_true:torch.LongTensor,y_pred:torch.Tensor):
-r"""Return the result of the evaluation on the specified ``validate_index``-th metric.
-
- Args:
- ``y_true`` (``torch.LongTensor``): The ground truth labels. Size :math:`(N_{samples}, )`.
- ``y_pred`` (``torch.Tensor``): The predicted labels. Size :math:`(N_{samples}, N_{class})` or :math:`(N_{samples}, )`.
- """
- returnsuper().validate(y_true,y_pred)
-
-
[docs]deftest(self,y_true:torch.LongTensor,y_pred:torch.Tensor):
-r"""Return results of the evaluation on all the ml_metrics in ``metric_configs``.
-
- Args:
- ``y_true`` (``torch.LongTensor``): The ground truth labels. Size :math:`(N_{samples}, )`.
- ``y_pred`` (``torch.Tensor``): The predicted labels. Size :math:`(N_{samples}, N_{class})` or :math:`(N_{samples}, )`.
- """
- returnsuper().test(y_true,y_pred)
[docs]classHGNN(nn.Module):
-r"""The HGNN model proposed in `Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>`_ paper (AAAI 2019).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``use_bn`` (``bool``): If set to ``True``, use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``, optional): Dropout ratio. Defaults to 0.5.
- """
-
- def__init__(
- self,
- in_channels:int,
- hid_channels:int,
- num_classes:int,
- use_bn:bool=False,
- drop_rate:float=0.5,
- )->None:
- super().__init__()
- self.layers=nn.ModuleList()
- self.layers.append(
- HGNNConv(in_channels,hid_channels,use_bn=use_bn,drop_rate=drop_rate)
- )
- self.layers.append(
- HGNNConv(hid_channels,num_classes,use_bn=use_bn,is_last=True)
- )
-
-
[docs]classHGNNP(nn.Module):
-r"""The HGNN :sup:`+` model proposed in `HGNN+: General Hypergraph Neural Networks <https://ieeexplore.ieee.org/document/9795251>`_ paper (IEEE T-PAMI 2022).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``use_bn`` (``bool``): If set to ``True``, use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``, optional): Dropout ratio. Defaults to ``0.5``.
- """
-
- def__init__(
- self,
- in_channels:int,
- hid_channels:int,
- num_classes:int,
- use_bn:bool=False,
- drop_rate:float=0.5,
- )->None:
- super().__init__()
- self.layers=nn.ModuleList()
- self.layers.append(
- HGNNPConv(in_channels,hid_channels,use_bn=use_bn,drop_rate=drop_rate)
- )
- self.layers.append(
- HGNNPConv(hid_channels,num_classes,use_bn=use_bn,is_last=True)
- )
-
-
[docs]classHNHN(nn.Module):
-r"""The HNHN model proposed in `HNHN: Hypergraph Networks with Hyperedge Neurons <https://arxiv.org/pdf/2006.12278.pdf>`_ paper (ICML 2020).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``use_bn`` (``bool``): If set to ``True``, use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``, optional): Dropout ratio. Defaults to ``0.5``.
- """
-
- def__init__(
- self,
- in_channels:int,
- hid_channels:int,
- num_classes:int,
- use_bn:bool=False,
- drop_rate:float=0.5,
- )->None:
- super().__init__()
- self.layers=nn.ModuleList()
- self.layers.append(
- HNHNConv(in_channels,hid_channels,use_bn=use_bn,drop_rate=drop_rate)
- )
- self.layers.append(
- HNHNConv(hid_channels,num_classes,use_bn=use_bn,is_last=True)
- )
-
-
[docs]classHWNN(nn.Module):
-r"""The HGNN model proposed in `Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>`_ paper (AAAI 2019).
-
- Parameters:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``ncount`` (``int``): The Number of node in the hypergraph.
- ``hyper_snapshot_num`` (``int``): The Number of snapshots splited from hypergraph.
- ``drop_rate`` (``float``, optional): Dropout ratio. Defaults to 0.5.
- """
-
- def__init__(
- self,
- in_channels:int,
- num_classes:int,
- ncount:int,
- hyper_snapshot_num:int=1,
- hid_channels:int=128,
- drop_rate:float=0.01,
- )->None:
- super().__init__()
- self.drop_rate=drop_rate
- self.convolution_1=HWNNConv(
- in_channels,hid_channels,ncount,K1=3,K2=3,approx=True
- )
- self.convolution_2=HWNNConv(
- hid_channels,num_classes,ncount,K1=3,K2=3,approx=True
- )
- self.par=torch.nn.Parameter(torch.Tensor(hyper_snapshot_num))
- torch.nn.init.uniform_(self.par,0,0.99)
-
-
[docs]defforward(self,X:torch.Tensor,hgs:list)->torch.Tensor:
-r"""The forward function.
- Parameters:
- ``X`` (``torch.Tensor``): Input vertex feature matrix. Size :math:`(N, C_{in})`.
- ``hg`` (``eg.Hypergraph``): The hypergraph structure that contains :math:`N` vertices.
- ``hgs`` (``list`` of ``Hypergraph``): A list of hypergraph structures whcih stands for snapshots.
- """
- channel=[]
- hyper_snapshot_num=len(hgs)
- forsnap_indexinrange(hyper_snapshot_num):
- hg=hgs[snap_index]
- Y=F.relu(self.convolution_1(X,hg))
- Y=F.dropout(Y,self.drop_rate)
- Y=self.convolution_2(Y,hg)
- Y=F.log_softmax(Y,dim=1)
- channel.append(Y)
- X=torch.zeros_like(channel[0])
- forindinrange(hyper_snapshot_num):
- X=X+self.par[ind]*channel[ind]
- returnX
[docs]classHyperGCN(nn.Module):
-r"""The HyperGCN model proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://papers.nips.cc/paper/2019/file/1efa39bcaec6f3900149160693694536-Paper.pdf>`_ paper (NeurIPS 2019).
-
- Parameters:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``use_mediator`` (``str``): Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``.
- ``fast`` (``bool``): If set to ``True``, the transformed graph structure will be computed once from the input hypergraph and vertex features, and cached for future use. Defaults to ``True``.
- ``drop_rate`` (``float``, optional): Dropout ratio. Defaults to 0.5.
- """
-
- def__init__(
- self,
- in_channels:int,
- hid_channels:int,
- num_classes:int,
- use_mediator:bool=False,
- use_bn:bool=False,
- fast:bool=True,
- drop_rate:float=0.5,
- )->None:
- super().__init__()
- self.fast=fast
- self.cached_g=None
- self.with_mediator=use_mediator
- self.layers=nn.ModuleList()
- self.layers.append(
- HyperGCNConv(
- in_channels,
- hid_channels,
- use_mediator,
- use_bn=use_bn,
- drop_rate=drop_rate,
- )
- )
- self.layers.append(
- HyperGCNConv(
- hid_channels,num_classes,use_mediator,use_bn=use_bn,is_last=True
- )
- )
-
-
[docs]defforward(self,data):
-"""
- The data should contain the follows
- data.x: node features
- data.edge_index: edge list (of size (2,|E|)) where data.edge_index[0] contains nodes and data.edge_index[1] contains hyperedges
- !!! Note that self loop should be assigned to a new (hyper)edge id!!!
- !!! Also note that the (hyper)edge id should start at 0 (akin to node id)
- data.norm: The weight for edges in bipartite graphs, correspond to data.edge_index
- !!! Note that we output final node representation. Loss should be defined outside.
- """
- ifself.edge_indexisNone:
- self.edge_index=self.generate_edge_index(data,self.self_loop)
- # print("generate_edge_index:", self.edge_index.shape)
- x,edge_index=data["features"],self.edge_index
- ifdata["weight"]==None:
- norm=torch.ones(edge_index.size()[1])
- else:
- norm=data["weight"]
-
- ifself.LearnMask:
- norm=self.Importance*norm
-
- reversed_edge_index=torch.stack([edge_index[1],edge_index[0]],dim=0)
- ifself.GPR:
- xs=[]
- xs.append(F.relu(self.MLP(x)))
- fori,_inenumerate(self.V2EConvs):
- x=F.relu(self.V2EConvs[i](x,edge_index,norm,self.aggr))
- # x = self.bnV2Es[i](x)
- x=F.dropout(x,p=self.dropout,training=self.training)
- x=self.E2VConvs[i](x,reversed_edge_index,norm,self.aggr)
- x=F.relu(x)
- xs.append(x)
- # x = self.bnE2Vs[i](x)
- x=F.dropout(x,p=self.dropout,training=self.training)
- x=torch.stack(xs,dim=-1)
- x=self.GPRweights(x).squeeze()
- x=self.classifier(x)
- else:
- x=F.dropout(x,p=0.2,training=self.training)# Input dropout
- fori,_inenumerate(self.V2EConvs):
- x=F.relu(self.V2EConvs[i](x,edge_index,norm,self.aggr))
- # x = self.bnV2Es[i](x)
- x=F.dropout(x,p=self.dropout,training=self.training)
- x=F.relu(self.E2VConvs[i](x,reversed_edge_index,norm,self.aggr))
- # x = self.bnE2Vs[i](x)
- x=F.dropout(x,p=self.dropout,training=self.training)
- x=self.classifier(x)
-
- returnx
[docs]classUniGCN(nn.Module):
-r"""The UniGCN model proposed in `UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks <https://arxiv.org/pdf/2105.00956.pdf>`_ paper (IJCAI 2021).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``use_bn`` (``bool``): If set to ``True``, use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``, optional): Dropout ratio. Defaults to ``0.5``.
- """
-
- def__init__(
- self,
- in_channels:int,
- hid_channels:int,
- num_classes:int,
- use_bn:bool=False,
- drop_rate:float=0.5,
- )->None:
- super().__init__()
- self.layers=nn.ModuleList()
- self.layers.append(
- UniGCNConv(in_channels,hid_channels,use_bn=use_bn,drop_rate=drop_rate)
- )
- self.layers.append(
- UniGCNConv(hid_channels,num_classes,use_bn=use_bn,is_last=True)
- )
-
-
[docs]classUniGAT(nn.Module):
-r"""The UniGAT model proposed in `UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks <https://arxiv.org/pdf/2105.00956.pdf>`_ paper (IJCAI 2021).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``num_heads`` (``int``): The Number of attention head in each layer.
- ``use_bn`` (``bool``): If set to ``True``, use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): The dropout probability. Defaults to ``0.5``.
- ``atten_neg_slope`` (``float``): Hyper-parameter of the ``LeakyReLU`` activation of edge attention. Defaults to 0.2.
- """
-
- def__init__(
- self,
- in_channels:int,
- hid_channels:int,
- num_classes:int,
- num_heads:int,
- use_bn:bool=False,
- drop_rate:float=0.5,
- atten_neg_slope:float=0.2,
- )->None:
- super().__init__()
- self.drop_layer=nn.Dropout(drop_rate)
- self.multi_head_layer=MultiHeadWrapper(
- num_heads,
- "concat",
- UniGATConv,
- in_channels=in_channels,
- out_channels=hid_channels,
- use_bn=use_bn,
- drop_rate=drop_rate,
- atten_neg_slope=atten_neg_slope,
- )
- # The original implementation has applied activation layer after the final layer.
- # Thus, we donot set ``is_last`` to ``True``.
- self.out_layer=UniGATConv(
- hid_channels*num_heads,
- num_classes,
- use_bn=use_bn,
- drop_rate=drop_rate,
- atten_neg_slope=atten_neg_slope,
- is_last=False,
- )
-
-
[docs]classUniSAGE(nn.Module):
-r"""The UniSAGE model proposed in `UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks <https://arxiv.org/pdf/2105.00956.pdf>`_ paper (IJCAI 2021).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``use_bn`` (``bool``): If set to ``True``, use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``, optional): Dropout ratio. Defaults to ``0.5``.
- """
-
- def__init__(
- self,
- in_channels:int,
- hid_channels:int,
- num_classes:int,
- use_bn:bool=False,
- drop_rate:float=0.5,
- )->None:
- super().__init__()
- self.layers=nn.ModuleList()
- self.layers.append(
- UniSAGEConv(in_channels,hid_channels,use_bn=use_bn,drop_rate=drop_rate)
- )
- self.layers.append(
- UniSAGEConv(hid_channels,num_classes,use_bn=use_bn,is_last=True)
- )
-
-
[docs]classUniGIN(nn.Module):
-r"""The UniGIN model proposed in `UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks <https://arxiv.org/pdf/2105.00956.pdf>`_ paper (IJCAI 2021).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``hid_channels`` (``int``): :math:`C_{hid}` is the number of hidden channels.
- ``num_classes`` (``int``): The Number of class of the classification task.
- ``eps`` (``float``): The epsilon value. Defaults to ``0.0``.
- ``train_eps`` (``bool``): If set to ``True``, the epsilon value will be trainable. Defaults to ``False``.
- ``use_bn`` (``bool``): If set to ``True``, use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``, optional): Dropout ratio. Defaults to ``0.5``.
- """
-
- def__init__(
- self,
- in_channels:int,
- hid_channels:int,
- num_classes:int,
- eps:float=0.0,
- train_eps:bool=False,
- use_bn:bool=False,
- drop_rate:float=0.5,
- )->None:
- super().__init__()
- self.layers=nn.ModuleList()
- self.layers.append(
- UniGINConv(
- in_channels,
- hid_channels,
- eps=eps,
- train_eps=train_eps,
- use_bn=use_bn,
- drop_rate=drop_rate,
- )
- )
- self.layers.append(
- UniGINConv(
- hid_channels,
- num_classes,
- eps=eps,
- train_eps=train_eps,
- use_bn=use_bn,
- is_last=True,
- )
- )
-
-
[docs]classMultiHeadWrapper(nn.Module):
-r"""A wrapper to apply multiple heads to a given layer.
-
- Args:
- ``num_heads`` (``int``): The number of heads.
- ``readout`` (``bool``): The readout method. Can be ``"mean"``, ``"max"``, ``"sum"``, or ``"concat"``.
- ``layer`` (``nn.Module``): The layer to apply multiple heads.
- ``**kwargs``: The keyword arguments for the layer.
-
- """
-
- def__init__(
- self,num_heads:int,readout:str,layer:nn.Module,**kwargs
- )->None:
- super().__init__()
- self.layers=nn.ModuleList()
- for_inrange(num_heads):
- self.layers.append(layer(**kwargs))
- self.num_heads=num_heads
- self.readout=readout
-
-
[docs]defforward(self,**kwargs)->torch.Tensor:
-r"""The forward function.
-
- .. note::
- You must explicitly pass the keyword arguments to the layer. For example, if the layer is ``GATConv``, you must pass ``X=X`` and ``g=g``.
- """
- ifself.readout=="concat":
- returntorch.cat([layer(**kwargs)forlayerinself.layers],dim=-1)
- else:
- outs=torch.stack([layer(**kwargs)forlayerinself.layers])
- ifself.readout=="mean":
- returnouts.mean(dim=0)
- elifself.readout=="max":
- returnouts.max(dim=0)[0]
- elifself.readout=="sum":
- returnouts.sum(dim=0)
- else:
- raiseValueError("Unknown readout type")
[docs]classJHConv(nn.Module):
-r"""The Jump Hypergraph Convolution layer proposed in `Dual Channel Hypergraph Collaborative Filtering <https://dl.acm.org/doi/10.1145/3394486.3403253>`_ paper (KDD 2020).
-
- Matrix Format:
-
- .. math::
- \mathbf{X}^{\prime} = \sigma \left( \mathbf{D}_v^{-\frac{1}{2}} \mathbf{H} \mathbf{W}_e \mathbf{D}_e^{-1}
- \mathbf{H}^\top \mathbf{D}_v^{-\frac{1}{2}} \mathbf{X} \mathbf{\Theta} + \mathbf{X} \right).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.act=nn.ReLU(inplace=True)
- self.drop=nn.Dropout(drop_rate)
- self.theta=nn.Linear(in_channels,out_channels,bias=bias)
-
-
[docs]defaggregate(self,inputs,index,dim_size=None,aggr="sum"):
-r"""Aggregates messages from neighbors as
- :math:`\square_{j \in \mathcal{N}(i)}`.
-
- Takes in the output of message computation as first argument and any
- argument which was initially passed to :meth:`propagate`.
-
- By default, this function will delegate its call to scatter functions
- that support "add", "mean" and "max" operations as specified in
- :meth:`__init__` by the :obj:`aggr` argument.
- """
- # ipdb.set_trace()
-
- returnscatter(inputs,index,dim=self.node_dim,reduce=aggr)
[docs]classHGNNConv(nn.Module):
-r"""The HGNN convolution layer proposed in `Hypergraph Neural Networks <https://arxiv.org/pdf/1809.09401>`_ paper (AAAI 2019).
- Matrix Format:
-
- .. math::
- \mathbf{X}^{\prime} = \sigma \left( \mathbf{D}_v^{-\frac{1}{2}} \mathbf{H} \mathbf{W}_e \mathbf{D}_e^{-1}
- \mathbf{H}^\top \mathbf{D}_v^{-\frac{1}{2}} \mathbf{X} \mathbf{\Theta} \right).
-
- where :math:`\mathbf{X}` is the input vertex feature matrix, :math:`\mathbf{H}` is the hypergraph incidence matrix,
- :math:`\mathbf{W}_e` is a diagonal hyperedge weight matrix, :math:`\mathbf{D}_v` is a diagonal vertex degree matrix,
- :math:`\mathbf{D}_e` is a diagonal hyperedge degree matrix, :math:`\mathbf{\Theta}` is the learnable parameters.
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.act=nn.ReLU(inplace=True)
- self.drop=nn.Dropout(drop_rate)
- self.theta=nn.Linear(in_channels,out_channels,bias=bias)
-
-
[docs]classHGNNPConv(nn.Module):
-r"""The HGNN :sup:`+` convolution layer proposed in `HGNN+: General Hypergraph Neural Networks <https://ieeexplore.ieee.org/document/9795251>`_ paper (IEEE T-PAMI 2022).
-
- Sparse Format:
-
- .. math::
-
- \left\{
- \begin{aligned}
- m_{\beta}^{t} &=\sum_{\alpha \in \mathcal{N}_{v}(\beta)} M_{v}^{t}\left(x_{\alpha}^{t}\right) \\
- y_{\beta}^{t} &=U_{e}^{t}\left(w_{\beta}, m_{\beta}^{t}\right) \\
- m_{\alpha}^{t+1} &=\sum_{\beta \in \mathcal{N}_{e}(\alpha)} M_{e}^{t}\left(x_{\alpha}^{t}, y_{\beta}^{t}\right) \\
- x_{\alpha}^{t+1} &=U_{v}^{t}\left(x_{\alpha}^{t}, m_{\alpha}^{t+1}\right) \\
- \end{aligned}
- \right.
-
- Matrix Format:
-
- .. math::
- \mathbf{X}^{\prime} = \sigma \left( \mathbf{D}_v^{-1} \mathbf{H} \mathbf{W}_e
- \mathbf{D}_e^{-1} \mathbf{H}^\top \mathbf{X} \mathbf{\Theta} \right).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.act=nn.ReLU(inplace=True)
- self.drop=nn.Dropout(drop_rate)
- self.theta=nn.Linear(in_channels,out_channels,bias=bias)
-
-
[docs]classHNHNConv(nn.Module):
-r"""The HNHN convolution layer proposed in `HNHN: Hypergraph Networks with Hyperedge Neurons <https://arxiv.org/pdf/2006.12278.pdf>`_ paper (ICML 2020).
-
- Args:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.act=nn.ReLU(inplace=True)
- self.drop=nn.Dropout(drop_rate)
- self.theta_v2e=nn.Linear(in_channels,out_channels,bias=bias)
- self.theta_e2v=nn.Linear(out_channels,out_channels,bias=bias)
-
-
[docs]defforward(self,X:torch.Tensor,hg:Hypergraph)->torch.Tensor:
-r"""The forward function.
-
- Args:
- X (``torch.Tensor``): Input vertex feature matrix. Size :math:`(|\mathcal{V}|, C_{in})`.
- hg (``dhg.Hypergraph``): The hypergraph structure that contains :math:`|\mathcal{V}|` vertices.
- """
- # v -> e
- X=self.theta_v2e(X)
- ifself.bnisnotNone:
- X=self.bn(X)
- Y=self.act(hg.v2e(X,aggr="mean"))
- # e -> v
- Y=self.theta_e2v(Y)
- X=hg.e2v(Y,aggr="mean")
- ifnotself.is_last:
- X=self.drop(self.act(X))
- returnX
[docs]classHyperGCNConv(nn.Module):
-r"""The HyperGCN convolution layer proposed in `HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs <https://papers.nips.cc/paper/2019/file/1efa39bcaec6f3900149160693694536-Paper.pdf>`_ paper (NeurIPS 2019).
-
- Parameters:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``use_mediator`` (``str``): Whether to use mediator to transform the hyperedges to edges in the graph. Defaults to ``False``.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- use_mediator:bool=False,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.use_mediator=use_mediator
- self.act=nn.ReLU(inplace=True)
- self.drop=nn.Dropout(drop_rate)
- self.theta=nn.Linear(in_channels,out_channels,bias=bias)
-
-
[docs]defforward(
- self,X:torch.Tensor,hg:Hypergraph,cached_g:Optional[Graph]=None
- )->torch.Tensor:
-r"""The forward function.
-
- Parameters:
- ``X`` (``torch.Tensor``): Input vertex feature matrix. Size :math:`(N, C_{in})`.
- ``hg`` (``eg.Hypergraph``): The hypergraph structure that contains :math:`N` vertices.
- ``cached_g`` (``eg.Graph``): The pre-transformed graph structure from the hypergraph structure that contains :math:`N` vertices. If not provided, the graph structure will be transformed for each forward time. Defaults to ``None``.
- """
- X=self.theta(X)
- ifself.bnisnotNone:
- X=self.bn(X)
- ifcached_gisNone:
- g=Graph.from_hypergraph_hypergcn(hg,X,self.use_mediator)
- X=g.smoothing_with_GCN(X)
- else:
- X=cached_g.smoothing_with_GCN(X)
- ifnotself.is_last:
- X=self.drop(self.act(X))
- returnX
[docs]classUniGCNConv(nn.Module):
-r"""The UniGCN convolution layer proposed in `UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks <https://arxiv.org/pdf/2105.00956.pdf>`_ paper (IJCAI 2021).
-
- Sparse Format:
-
- .. math::
- \left\{
- \begin{aligned}
- h_{e} &= \frac{1}{|e|} \sum_{j \in e} x_{j} \\
- \tilde{x}_{i} &= \frac{1}{\sqrt{d_{i}}} \sum_{e \in \tilde{E}_{i}} \frac{1}{\sqrt{\tilde{d}_{e}}} W h_{e}
- \end{aligned}
- \right. .
-
- where :math:`\tilde{d}_{e} = \frac{1}{|e|} \sum_{i \in e} d_{i}`.
-
- Matrix Format:
-
- .. math::
- \mathbf{X}^{\prime} = \sigma \left( \mathbf{D}_v^{-\frac{1}{2}} \mathbf{H} \tilde{\mathbf{D}}_e^{-\frac{1}{2}} \cdot \mathbf{D}_e^{-1} \mathbf{H}^\top \mathbf{X} \mathbf{\Theta} \right) .
-
- Parameters:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.act=nn.ReLU(inplace=True)
- self.drop=nn.Dropout(drop_rate)
- self.theta=nn.Linear(in_channels,out_channels,bias=bias)
-
-
[docs]classUniGATConv(nn.Module):
-r"""The UniGAT convolution layer proposed in `UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks <https://arxiv.org/pdf/2105.00956.pdf>`_ paper (IJCAI 2021).
-
- Sparse Format:
-
- .. math::
- \left\{
- \begin{aligned}
- \alpha_{i e} &=\sigma\left(a^{T}\left[W h_{\{i\}} ; W h_{e}\right]\right) \\
- \tilde{\alpha}_{i e} &=\frac{\exp \left(\alpha_{i e}\right)}{\sum_{e^{\prime} \in \tilde{E}_{i}} \exp \left(\alpha_{i e^{\prime}}\right)} \\
- \tilde{x}_{i} &=\sum_{e \in \tilde{E}_{i}} \tilde{\alpha}_{i e} W h_{e}
- \end{aligned}
- \right. .
-
- Parameters:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): The dropout probability. If ``dropout <= 0``, the layer will not drop values. Defaults to ``0.5``.
- ``atten_neg_slope`` (``float``): Hyper-parameter of the ``LeakyReLU`` activation of edge attention. Defaults to ``0.2``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- atten_neg_slope:float=0.2,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.atten_dropout=nn.Dropout(drop_rate)
- self.atten_act=nn.LeakyReLU(atten_neg_slope)
- self.act=nn.ELU(inplace=True)
- self.theta=nn.Linear(in_channels,out_channels,bias=bias)
- self.atten_e=nn.Linear(out_channels,1,bias=False)
- self.atten_dst=nn.Linear(out_channels,1,bias=False)
-
-
[docs]defforward(self,X:torch.Tensor,hg:Hypergraph)->torch.Tensor:
-r"""The forward function.
-
- Parameters:
- X (``torch.Tensor``): Input vertex feature matrix. Size :math:`(|\mathcal{V}|, C_{in})`.
- hg (``eg.Hypergraph``): The hypergraph structure that contains :math:`|\mathcal{V}|` vertices.
- """
- X=self.theta(X)
- Y=hg.v2e(X,aggr="mean")
- # ===============================================
- # alpha_e = self.atten_e(Y)
- # e_atten_score = alpha_e[hg.e2v_src]
- # e_atten_score = self.atten_dropout(self.atten_act(e_atten_score).squeeze())
-
- e_atten_score=self.atten_dropout(
- self.atten_act(self.atten_e(Y)[hg.e2v_src]).squeeze()
- )
-
- # ================================================================================
- # We suggest to add a clamp on attention weight to avoid Nan error in training.
- e_atten_score.clamp_(min=0.001,max=5)
- # ================================================================================
- X=hg.e2v(Y,aggr="softmax_then_sum",e2v_weight=e_atten_score)
-
- ifnotself.is_last:
- X=self.act(X)
- ifself.bnisnotNone:
- X=self.bn(X)
- returnX
-
-
-
[docs]classUniSAGEConv(nn.Module):
-r"""The UniSAGE convolution layer proposed in `UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks <https://arxiv.org/pdf/2105.00956.pdf>`_ paper (IJCAI 2021).
-
- Sparse Format:
-
- .. math::
- \left\{
- \begin{aligned}
- h_{e} &= \frac{1}{|e|} \sum_{j \in e} x_{j} \\
- \tilde{x}_{i} &= W\left(x_{i}+\text { AGGREGATE }\left(\left\{x_{j}\right\}_{j \in \mathcal{N}_{i}}\right)\right)
- \end{aligned}
- \right. .
-
- Matrix Format:
-
- .. math::
- \mathbf{X}^{\prime} = \sigma \left( \left( \mathbf{I} + \mathbf{H} \mathbf{D}_e^{-1} \mathbf{H}^\top \right) \mathbf{X} \mathbf{\Theta} \right) .
-
- Parameters:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.act=nn.ReLU(inplace=True)
- self.drop=nn.Dropout(drop_rate)
- self.theta=nn.Linear(in_channels,out_channels,bias=bias)
-
-
[docs]classUniGINConv(nn.Module):
-r"""The UniGIN convolution layer proposed in `UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks <https://arxiv.org/pdf/2105.00956.pdf>`_ paper (IJCAI 2021).
-
- Sparse Format:
-
- .. math::
-
- \left\{
- \begin{aligned}
- h_{e} &= \frac{1}{|e|} \sum_{j \in e} x_{j} \\
- \tilde{x}_{i} &= W\left((1+\varepsilon) x_{i}+\sum_{e \in E_{i}} h_{e}\right)
- \end{aligned}
- \right. .
-
- Matrix Format:
-
- .. math::
- \mathbf{X}^{\prime} = \sigma \left( \left( \left( \mathbf{I} + \varepsilon \right) + \mathbf{H} \mathbf{D}_e^{-1} \mathbf{H}^\top \right) \mathbf{X} \mathbf{\Theta} \right) .
-
- Parameters:
- ``in_channels`` (``int``): :math:`C_{in}` is the number of input channels.
- ``out_channels`` (int): :math:`C_{out}` is the number of output channels.
- ``eps`` (``float``): :math:`\varepsilon` is the learnable parameter. Defaults to ``0.0``.
- ``train_eps`` (``bool``): If set to ``True``, the layer will learn the :math:`\varepsilon` parameter. Defaults to ``False``.
- ``bias`` (``bool``): If set to ``False``, the layer will not learn the bias parameter. Defaults to ``True``.
- ``use_bn`` (``bool``): If set to ``True``, the layer will use batch normalization. Defaults to ``False``.
- ``drop_rate`` (``float``): If set to a positive number, the layer will use dropout. Defaults to ``0.5``.
- ``is_last`` (``bool``): If set to ``True``, the layer will not apply the final activation and dropout functions. Defaults to ``False``.
- """
-
- def__init__(
- self,
- in_channels:int,
- out_channels:int,
- eps:float=0.0,
- train_eps:bool=False,
- bias:bool=True,
- use_bn:bool=False,
- drop_rate:float=0.5,
- is_last:bool=False,
- ):
- super().__init__()
- self.is_last=is_last
- iftrain_eps:
- self.eps=nn.Parameter(torch.tensor([eps]))
- else:
- self.eps=eps
- self.bn=nn.BatchNorm1d(out_channels)ifuse_bnelseNone
- self.act=nn.ReLU(inplace=True)
- self.drop=nn.Dropout(drop_rate)
- self.theta=nn.Linear(in_channels,out_channels,bias=bias)
-
-
[docs]defforward(
- self,x,edge_index:Adj,size:Size=None,return_attention_weights=None
- ):
-r"""
- Args:
- return_attention_weights (bool, optional): If set to :obj:`True`,
- will additionally return the tuple
- :obj:`(edge_index, attention_weights)`, holding the computed
- attention weights for each edge. (default: :obj:`None`)
- """
- H,C=self.heads,self.hidden
-
- x_l:OptTensor=None
- x_r:OptTensor=None
- alpha_l:OptTensor=None
- alpha_r:OptTensor=None
- ifisinstance(x,Tensor):
- assertx.dim()==2,"Static graphs not supported in `GATConv`."
- x_K=self.lin_K(x).view(-1,H,C)
- x_V=self.lin_V(x).view(-1,H,C)
- alpha_r=(x_K*self.att_r).sum(dim=-1)
-
- out=self.propagate(edge_index,x=x_V,alpha=alpha_r,aggr=self.aggr)
-
- alpha=self._alpha
- self._alpha=None
-
- # Note that in the original code of GMT paper, they do not use additional W^O to combine heads.
- # This is because O = softmax(QK^T)V and V = V_in*W^V. So W^O can be effectively taken care by W^V!!!
- out+=self.att_r# This is Seed + Multihead
- # concat heads then LayerNorm. Z (rhs of Eq(7)) in GMT paper.
- out=self.ln0(out.view(-1,self.heads*self.hidden))
- # rFF and skip connection. Lhs of eq(7) in GMT paper.
- out=self.ln1(out+F.relu(self.rFF(out)))
-
- ifisinstance(return_attention_weights,bool):
- assertalphaisnotNone
- ifisinstance(edge_index,Tensor):
- returnout,(edge_index,alpha)
- elifisinstance(edge_index,SparseTensor):
- returnout,edge_index.set_value(alpha,layout="coo")
- else:
- returnout
[docs]defaggregate(self,inputs,index,dim_size=None,aggr="add"):
-r"""Aggregates messages from neighbors as
- :math:`\square_{j \in \mathcal{N}(i)}`.
-
- Takes in the output of message computation as first argument and any
- argument which was initially passed to :meth:`propagate`.
-
- By default, this function will delegate its call to scatter functions
- that support "add", "mean" and "max" operations as specified in
- :meth:`__init__` by the :obj:`aggr` argument.
- """
- # ipdb.set_trace()
- ifaggrisNone:
- raiseValueError("aggr was not passed!")
- returnscatter(inputs,index,dim=self.node_dim,reduce=aggr)
[docs]classBPRLoss(nn.Module):
-r"""This criterion computes the Bayesian Personalized Ranking (BPR) loss between the positive scores and the negative scores.
-
- Args:
- ``alpha`` (``float``, optional): The weight for the positive scores in the BPR loss. Defaults to ``1.0``.
- ``beta`` (``float``, optional): The weight for the negative scores in the BPR loss. Defaults to ``1.0``.
- ``activation`` (``str``, optional): The activation function to use can be one of ``"sigmoid_then_log"``, ``"softplus"``. Defaults to ``"sigmoid_then_log"``.
- """
-
- def__init__(
- self,
- alpha:float=1.0,
- beta:float=1.0,
- activation:str="sigmoid_then_log",
- ):
- super().__init__()
- assertactivationin(
- "sigmoid_then_log",
- "softplus",
- ),"activation function of BPRLoss must be sigmoid_then_log or softplus."
- self.activation=activation
- self.alpha=alpha
- self.beta=beta
-
-
[docs]defforward(self,pos_scores:torch.Tensor,neg_scores:torch.Tensor):
-r"""The forward function of BPRLoss.
-
- Args:
- ``pos_scores`` (``torch.Tensor``): The positive scores.
- ``neg_scores`` (``torch.Tensor``): The negative scores.
- """
- ifself.activation=="sigmoid_then_log":
- loss=-(self.alpha*pos_scores-self.beta*neg_scores).sigmoid().log()
- elifself.activation=="softplus":
- loss=F.softplus(self.beta*neg_scores-self.alpha*pos_scores)
- else:
- raiseNotImplementedError
- returnloss.mean()
[docs]classEmbeddingRegularization(nn.Module):
-r"""Regularization function for embeddings.
-
- Args:
- ``p`` (``int``): The power to use in the regularization. Defaults to ``2``.
- ``weight_decay`` (``float``): The weight of the regularization. Defaults to ``1e-4``.
- """
-
- def__init__(self,p:int=2,weight_decay:float=1e-4):
- super().__init__()
- self.p=p
- self.weight_decay=weight_decay
-
-
[docs]defparse_edgelist(
- lines,comments="#",delimiter=None,create_using=None,nodetype=None,data=True
-):
-"""Parse lines of an edge list representation of a graph.
-
- Parameters
- ----------
- lines : list or iterator of strings
- Input data in edgelist format
- comments : string, optional
- Marker for comment lines. Default is `'#'`. To specify that no character
- should be treated as a comment, use ``comments=None``.
- delimiter : string, optional
- Separator for node labels. Default is `None`, meaning any whitespace.
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
- nodetype : Python type, optional
- Convert nodes to this type. Default is `None`, meaning no conversion is
- performed.
- data : bool or list of (label,type) tuples
- If `False` generate no edge data or if `True` use a dictionary
- representation of edge data or a list tuples specifying dictionary
- key names and types for edge data.
-
- Returns
- -------
- G: EasyGraph Graph
- The graph corresponding to lines
-
- Examples
- --------
- Edgelist with no data:
-
- >>> lines = ["1 2", "2 3", "3 4"]
- >>> G = eg.parse_edgelist(lines, nodetype=int)
- >>> list(G)
- [1, 2, 3, 4]
- >>> list(G.edges)
- [(1, 2), (2, 3), (3, 4)]
-
- Edgelist with data in Python dictionary representation:
-
- >>> lines = ["1 2 {'weight': 3}", "2 3 {'weight': 27}", "3 4 {'weight': 3.0}"]
- >>> G = eg.parse_edgelist(lines, nodetype=int)
- >>> list(G)
- [1, 2, 3, 4]
- >>> list(G.edges)
- [(1, 2, {'weight': 3}), (2, 3, {'weight': 27}), (3, 4, {'weight': 3.0})]
-
- Edgelist with data in a list:
-
- >>> lines = ["1 2 3", "2 3 27", "3 4 3.0"]
- >>> G = eg.parse_edgelist(lines, nodetype=int, data=(("weight", float),))
- >>> list(G)
- [1, 2, 3, 4]
- >>> list(G.edges)
- [(1, 2, {'weight': 3.0}), (2, 3, {'weight': 27.0}), (3, 4, {'weight': 3.0})]
-
- See Also
- --------
- read_weighted_edgelist
- """
- fromastimportliteral_eval
-
- G=eg.empty_graph(0,create_using)
- forlineinlines:
- ifcommentsisnotNone:
- p=line.find(comments)
- ifp>=0:
- line=line[:p]
- ifnotline:
- continue
- # split line, should have 2 or more
- s=line.strip().split(delimiter)
- iflen(s)<2:
- continue
- u=s.pop(0)
- v=s.pop(0)
- d=s
- ifnodetypeisnotNone:
- try:
- u=nodetype(u)
- v=nodetype(v)
- exceptExceptionaserr:
- raiseTypeError(
- f"Failed to convert nodes {u},{v} to type {nodetype}."
- )fromerr
-
- iflen(d)==0ordataisFalse:
- # no data or data type specified
- edgedata={}
- elifdataisTrue:
- # no edge types specified
- try:# try to evaluate as dictionary
- ifdelimiter==",":
- edgedata_str=",".join(d)
- else:
- edgedata_str=" ".join(d)
- edgedata=dict(literal_eval(edgedata_str.strip()))
- exceptExceptionaserr:
- raiseTypeError(
- f"Failed to convert edge data ({d}) to dictionary."
- )fromerr
- else:
- # convert edge data to dictionary with specified keys and type
- iflen(d)!=len(data):
- raiseIndexError(
- f"Edge data {d} and data_keys {data} are not the same length"
- )
- edgedata={}
- for(edge_key,edge_type),edge_valueinzip(data,d):
- try:
- edge_value=edge_type(edge_value)
- exceptExceptionaserr:
- raiseTypeError(
- f"Failed to convert {edge_key} data {edge_value} "
- f"to type {edge_type}."
- )fromerr
- edgedata.update({edge_key:edge_value})
- G.add_edge(u,v,**edgedata)
- returnG
-
-
-
[docs]defgenerate_edgelist(G,delimiter=" ",data=True):
-"""Generate a single line of the graph G in edge list format.
-
- Parameters
- ----------
- G : EasyGraph graph
-
- delimiter : string, optional
- Separator for node labels
-
- data : bool or list of keys
- If False generate no edge data. If True use a dictionary
- representation of edge data. If a list of keys use a list of data
- values corresponding to the keys.
-
- Returns
- -------
- lines : string
- Lines of data in adjlist format.
-
- Examples
- --------
- >>> G = eg.lollipop_graph(4, 3)
- >>> G[1][2]["weight"] = 3
- >>> G[3][4]["capacity"] = 12
- >>> for line in eg.generate_edgelist(G, data=False):
- ... print(line)
- 0 1
- 0 2
- 0 3
- 1 2
- 1 3
- 2 3
- 3 4
- 4 5
- 5 6
-
- >>> for line in eg.generate_edgelist(G):
- ... print(line)
- 0 1 {}
- 0 2 {}
- 0 3 {}
- 1 2 {'weight': 3}
- 1 3 {}
- 2 3 {}
- 3 4 {'capacity': 12}
- 4 5 {}
- 5 6 {}
-
- >>> for line in eg.generate_edgelist(G, data=["weight"]):
- ... print(line)
- 0 1
- 0 2
- 0 3
- 1 2 3
- 1 3
- 2 3
- 3 4
- 4 5
- 5 6
-
- See Also
- --------
- write_adjlist, read_adjlist
- """
- edges=G.edges
- ifedgesandlen(edges[0])>3:
- # multigraph
- edges=((u,v,d)foru,v,_,dinedges)
- ifdataisTrue:
- foru,v,dinedges:
- e=u,v,dict(d)
- yielddelimiter.join(map(str,e))
- elifdataisFalse:
- foru,v,_inedges:
- e=u,v
- yielddelimiter.join(map(str,e))
- else:
- foru,v,dinedges:
- e=[u,v]
- try:
- e.extend(d[k]forkindata)
- exceptKeyError:
- pass# missing data for this edge, should warn?
- yielddelimiter.join(map(str,e))
-
-
-
[docs]@open_file(1,mode="wb")
-defwrite_edgelist(G,path,comments="#",delimiter=" ",data=True,encoding="utf-8"):
-"""Write graph as a list of edges.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph
- path : file or string
- File or filename to write. If a file is provided, it must be
- opened in 'wb' mode. Filenames ending in .gz or .bz2 will be compressed.
- comments : string, optional
- The character used to indicate the start of a comment
- delimiter : string, optional
- The string used to separate values. The default is whitespace.
- data : bool or list, optional
- If False write no edge data.
- If True write a string representation of the edge data dictionary..
- If a list (or other iterable) is provided, write the keys specified
- in the list.
- encoding: string, optional
- Specify which encoding to use when writing file.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> eg.write_edgelist(G, "test.edgelist")
- >>> G = eg.path_graph(4)
- >>> fh = open("test.edgelist", "wb")
- >>> eg.write_edgelist(G, fh)
- >>> eg.write_edgelist(G, "test.edgelist.gz")
- >>> eg.write_edgelist(G, "test.edgelist.gz", data=False)
-
- >>> G = eg.Graph()
- >>> G.add_edge(1, 2, weight=7, color="red")
- >>> eg.write_edgelist(G, "test.edgelist", data=False)
- >>> eg.write_edgelist(G, "test.edgelist", data=["color"])
- >>> eg.write_edgelist(G, "test.edgelist", data=["color", "weight"])
-
- See Also
- --------
- read_edgelist
- write_weighted_edgelist
- """
-
- forlineingenerate_edgelist(G,delimiter,data):
- line+="\n"
- path.write(line.encode(encoding))
-
-
-
[docs]@open_file(0,mode="rb")
-defread_edgelist(
- path,
- comments="#",
- delimiter=None,
- create_using=None,
- nodetype=None,
- data=True,
- edgetype=None,
- encoding="utf-8",
-):
-"""Read a graph from a list of edges.
-
- Parameters
- ----------
- path : file or string
- File or filename to read. If a file is provided, it must be
- opened in 'rb' mode.
- Filenames ending in .gz or .bz2 will be uncompressed.
- comments : string, optional
- The character used to indicate the start of a comment. To specify that
- no character should be treated as a comment, use ``comments=None``.
- delimiter : string, optional
- The string used to separate values. The default is whitespace.
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
- nodetype : int, float, str, Python type, optional
- Convert node data from strings to specified type
- data : bool or list of (label,type) tuples
- Tuples specifying dictionary key names and types for edge data
- edgetype : int, float, str, Python type, optional OBSOLETE
- Convert edge data from strings to specified type and use as 'weight'
- encoding: string, optional
- Specify which encoding to use when reading file.
-
- Returns
- -------
- G : graph
- A easygraph Graph or other type specified with create_using
-
- Examples
- --------
- >>> eg.write_edgelist(eg.path_graph(4), "test.edgelist")
- >>> G = eg.read_edgelist("test.edgelist")
-
- >>> fh = open("test.edgelist", "rb")
- >>> G = eg.read_edgelist(fh)
- >>> fh.close()
-
- >>> G = eg.read_edgelist("test.edgelist", nodetype=int)
- >>> G = eg.read_edgelist("test.edgelist", create_using=eg.DiGraph)
-
- Edgelist with data in a list:
-
- >>> textline = "1 2 3"
- >>> fh = open("test.edgelist", "w")
- >>> d = fh.write(textline)
- >>> fh.close()
- >>> G = eg.read_edgelist("test.edgelist", nodetype=int, data=(("weight", float),))
- >>> list(G)
- [1, 2]
- >>> list(G.edges)
- [(1, 2, {'weight': 3.0})]
-
- See parse_edgelist() for more examples of formatting.
-
- See Also
- --------
- parse_edgelist
- write_edgelist
-
- Notes
- -----
- Since nodes must be hashable, the function nodetype must return hashable
- types (e.g. int, float, str, frozenset - or tuples of those, etc.)
- """
- lines=(lineifisinstance(line,str)elseline.decode(encoding)forlineinpath)
- returnparse_edgelist(
- lines,
- comments=comments,
- delimiter=delimiter,
- create_using=create_using,
- nodetype=nodetype,
- data=data,
- )
-
-
-
[docs]defwrite_weighted_edgelist(G,path,comments="#",delimiter=" ",encoding="utf-8"):
-"""Write graph G as a list of edges with numeric weights.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph
- path : file or string
- File or filename to write. If a file is provided, it must be
- opened in 'wb' mode.
- Filenames ending in .gz or .bz2 will be compressed.
- comments : string, optional
- The character used to indicate the start of a comment
- delimiter : string, optional
- The string used to separate values. The default is whitespace.
- encoding: string, optional
- Specify which encoding to use when writing file.
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_edge(1, 2, weight=7)
- >>> eg.write_weighted_edgelist(G, "test.weighted.edgelist")
-
- See Also
- --------
- read_edgelist
- write_edgelist
- read_weighted_edgelist
- """
- write_edgelist(
- G,
- path,
- comments=comments,
- delimiter=delimiter,
- data=("weight",),
- encoding=encoding,
- )
-
-
-
[docs]defread_weighted_edgelist(
- path,
- comments="#",
- delimiter=None,
- create_using=None,
- nodetype=None,
- encoding="utf-8",
-):
-"""Read a graph as list of edges with numeric weights.
-
- Parameters
- ----------
- path : file or string
- File or filename to read. If a file is provided, it must be
- opened in 'rb' mode.
- Filenames ending in .gz or .bz2 will be uncompressed.
- comments : string, optional
- The character used to indicate the start of a comment.
- delimiter : string, optional
- The string used to separate values. The default is whitespace.
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
- nodetype : int, float, str, Python type, optional
- Convert node data from strings to specified type
- encoding: string, optional
- Specify which encoding to use when reading file.
-
- Returns
- -------
- G : graph
- A easygraph Graph or other type specified with create_using
-
- Notes
- -----
- Since nodes must be hashable, the function nodetype must return hashable
- types (e.g. int, float, str, frozenset - or tuples of those, etc.)
-
- Example edgelist file format.
-
- With numeric edge data::
-
- # read with
- # >>> G=eg.read_weighted_edgelist(fh)
- # source target data
- a b 1
- a c 3.14159
- d e 42
-
- See Also
- --------
- write_weighted_edgelist
- """
- returnread_edgelist(
- path,
- comments=comments,
- delimiter=delimiter,
- create_using=create_using,
- nodetype=nodetype,
- data=(("weight",float),),
- encoding=encoding,
- )
[docs]defwrite_gexf(G,path,encoding="utf-8",prettyprint=True,version="1.2draft"):
-"""Write G in GEXF format to path.
-
- "GEXF (Graph Exchange XML Format) is a language for describing
- complex networks structures, their associated data and dynamics" [1]_.
-
- Node attributes are checked according to the version of the GEXF
- schemas used for parameters which are not user defined,
- e.g. visualization 'viz' [2]_. See example for usage.
-
- Parameters
- ----------
- G : graph
- An EasyGraph graph
- path : file or string
- File or file name to write.
- File names ending in .gz or .bz2 will be compressed.
- encoding : string (optional, default: 'utf-8')
- Encoding for text data.
- prettyprint : bool (optional, default: True)
- If True use line breaks and indenting in output XML.
- version: string (optional, default: '1.2draft')
- The version of GEXF to be used for nodes attributes checking
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> eg.write_gexf(G, "test.gexf")
-
- """
- writer=GEXFWriter(encoding=encoding,prettyprint=prettyprint,version=version)
- writer.add_graph(G)
- writer.write(path)
-
-
-
[docs]defgenerate_gexf(G,encoding="utf-8",prettyprint=True,version="1.2draft"):
-"""Generate lines of GEXF format representation of G.
-
- "GEXF (Graph Exchange XML Format) is a language for describing
- complex networks structures, their associated data and dynamics" [1]_.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph
- encoding : string (optional, default: 'utf-8')
- Encoding for text data.
- prettyprint : bool (optional, default: True)
- If True use line breaks and indenting in output XML.
- version : string (default: 1.2draft)
- Version of GEFX File Format (see http://gexf.net/schema.html)
- Supported values: "1.1draft", "1.2draft"
-
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> linefeed = chr(10) # linefeed=\n
- >>> s = linefeed.join(eg.generate_gexf(G))
- >>> for line in eg.generate_gexf(G): # doctest: +SKIP
- ... print(line)
-
- Notes
- -----
- This implementation does not support mixed graphs (directed and undirected
- edges together).
-
- The node id attribute is set to be the string of the node label.
- If you want to specify an id use set it as node data, e.g.
- node['a']['id']=1 to set the id of node 'a' to 1.
-
- References
- ----------
- .. [1] GEXF File Format, https://gephi.org/gexf/format/
- """
- writer=GEXFWriter(encoding=encoding,prettyprint=prettyprint,version=version)
- writer.add_graph(G)
- yield fromstr(writer).splitlines()
-
-
-
[docs]@open_file(0,mode="rb")
-defread_gexf(path,node_type=None,relabel=False,version="1.2draft"):
-"""Read graph in GEXF format from path.
-
- "GEXF (Graph Exchange XML Format) is a language for describing
- complex networks structures, their associated data and dynamics" [1]_.
-
- Parameters
- ----------
- path : file or string
- File or file name to read.
- File names ending in .gz or .bz2 will be decompressed.
- node_type: Python type (default: None)
- Convert node ids to this type if not None.
- relabel : bool (default: False)
- If True relabel the nodes to use the GEXF node "label" attribute
- instead of the node "id" attribute as the EasyGraph node label.
- version : string (default: 1.2draft)
- Version of GEFX File Format (see http://gexf.net/schema.html)
- Supported values: "1.1draft", "1.2draft"
-
- Returns
- -------
- graph: EasyGraph graph
- If no parallel edges are found a Graph or DiGraph is returned.
- Otherwise a MultiGraph or MultiDiGraph is returned.
-
- Notes
- -----
- This implementation does not support mixed graphs (directed and undirected
- edges together).
-
- References
- ----------
- .. [1] GEXF File Format, http://gexf.net/
- """
- reader=GEXFReader(node_type=node_type,version=version)
- ifrelabel:
- G=relabel_gexf_graph(reader(path))
- else:
- G=reader(path)
- returnG
-
-
-classGEXF:
- versions={}
- d={
- "NS_GEXF":"http://www.gexf.net/1.1draft",
- "NS_VIZ":"http://www.gexf.net/1.1draft/viz",
- "NS_XSI":"http://www.w3.org/2001/XMLSchema-instance",
- "SCHEMALOCATION":" ".join(
- ["http://www.gexf.net/1.1draft","http://www.gexf.net/1.1draft/gexf.xsd"]
- ),
- "VERSION":"1.1",
- }
- versions["1.1draft"]=d
- d={
- "NS_GEXF":"http://www.gexf.net/1.2draft",
- "NS_VIZ":"http://www.gexf.net/1.2draft/viz",
- "NS_XSI":"http://www.w3.org/2001/XMLSchema-instance",
- "SCHEMALOCATION":" ".join(
- ["http://www.gexf.net/1.2draft","http://www.gexf.net/1.2draft/gexf.xsd"]
- ),
- "VERSION":"1.2",
- }
- versions["1.2draft"]=d
-
- defconstruct_types(self):
- types=[
- (int,"integer"),
- (float,"float"),
- (float,"double"),
- (bool,"boolean"),
- (list,"string"),
- (dict,"string"),
- (int,"long"),
- (str,"liststring"),
- (str,"anyURI"),
- (str,"string"),
- ]
-
- # These additions to types allow writing numpy types
- try:
- importnumpyasnp
- exceptImportError:
- pass
- else:
- # prepend so that python types are created upon read (last entry wins)
- types=[
- (np.float64,"float"),
- (np.float32,"float"),
- (np.float16,"float"),
- (np.float_,"float"),
- (np.int_,"int"),
- (np.int8,"int"),
- (np.int16,"int"),
- (np.int32,"int"),
- (np.int64,"int"),
- (np.uint8,"int"),
- (np.uint16,"int"),
- (np.uint32,"int"),
- (np.uint64,"int"),
- (np.int_,"int"),
- (np.intc,"int"),
- (np.intp,"int"),
- ]+types
-
- self.xml_type=dict(types)
- self.python_type=dict(reversed(a)foraintypes)
-
- # http://www.w3.org/TR/xmlschema-2/#boolean
- convert_bool={
- "true":True,
- "false":False,
- "True":True,
- "False":False,
- "0":False,
- 0:False,
- "1":True,
- 1:True,
- }
-
- defset_version(self,version):
- d=self.versions.get(version)
- ifdisNone:
- raiseAssertionError(f"Unknown GEXF version {version}.")
- self.NS_GEXF=d["NS_GEXF"]
- self.NS_VIZ=d["NS_VIZ"]
- self.NS_XSI=d["NS_XSI"]
- self.SCHEMALOCATION=d["SCHEMALOCATION"]
- self.VERSION=d["VERSION"]
- self.version=version
-
-
-classGEXFWriter(GEXF):
- # class for writing GEXF format files
- # use write_gexf() function
- def__init__(
- self,graph=None,encoding="utf-8",prettyprint=True,version="1.2draft"
- ):
- self.construct_types()
- self.prettyprint=prettyprint
- self.encoding=encoding
- self.set_version(version)
- self.xml=Element(
- "gexf",
- {
- "xmlns":self.NS_GEXF,
- "xmlns:xsi":self.NS_XSI,
- "xsi:schemaLocation":self.SCHEMALOCATION,
- "version":self.VERSION,
- },
- )
-
- # Make meta element a non-graph element
- # Also add lastmodifieddate as attribute, not tag
- meta_element=Element("meta")
- subelement_text=f"EasyGraph"
- SubElement(meta_element,"creator").text=subelement_text
- meta_element.set("lastmodifieddate",time.strftime("%Y-%m-%d"))
- self.xml.append(meta_element)
-
- register_namespace("viz",self.NS_VIZ)
-
- # counters for edge and attribute identifiers
- self.edge_id=itertools.count()
- self.attr_id=itertools.count()
- self.all_edge_ids=set()
- # default attributes are stored in dictionaries
- self.attr={}
- self.attr["node"]={}
- self.attr["edge"]={}
- self.attr["node"]["dynamic"]={}
- self.attr["node"]["static"]={}
- self.attr["edge"]["dynamic"]={}
- self.attr["edge"]["static"]={}
-
- ifgraphisnotNone:
- self.add_graph(graph)
-
- def__str__(self):
- ifself.prettyprint:
- self.indent(self.xml)
- s=tostring(self.xml).decode(self.encoding)
- returns
-
- defadd_graph(self,G):
- # first pass through G collecting edge ids
- foru,v,ddinG.edges:
- eid=dd.get("id")
- ifeidisnotNone:
- self.all_edge_ids.add(str(eid))
- # set graph attributes
- ifG.graph.get("mode")=="dynamic":
- mode="dynamic"
- else:
- mode="static"
- # Add a graph element to the XML
- ifG.is_directed():
- default="directed"
- else:
- default="undirected"
- name=G.graph.get("name","")
- graph_element=Element("graph",defaultedgetype=default,mode=mode,name=name)
- self.graph_element=graph_element
- self.add_nodes(G,graph_element)
- self.add_edges(G,graph_element)
- self.xml.append(graph_element)
-
- defadd_nodes(self,G,graph_element):
- nodes_element=Element("nodes")
- fornode,datainG.nodes.items():
- node_data=data.copy()
- node_id=str(node_data.pop("id",node))
- kw={"id":node_id}
- label=str(node_data.pop("label",node))
- kw["label"]=label
- try:
- pid=node_data.pop("pid")
- kw["pid"]=str(pid)
- exceptKeyError:
- pass
- try:
- start=node_data.pop("start")
- kw["start"]=str(start)
- self.alter_graph_mode_timeformat(start)
- exceptKeyError:
- pass
- try:
- end=node_data.pop("end")
- kw["end"]=str(end)
- self.alter_graph_mode_timeformat(end)
- exceptKeyError:
- pass
- # add node element with attributes
- node_element=Element("node",**kw)
- # add node element and attr subelements
- default=G.graph.get("node_default",{})
- node_data=self.add_parents(node_element,node_data)
- ifself.VERSION=="1.1":
- node_data=self.add_slices(node_element,node_data)
- else:
- node_data=self.add_spells(node_element,node_data)
- node_data=self.add_viz(node_element,node_data)
- node_data=self.add_attributes("node",node_element,node_data,default)
- nodes_element.append(node_element)
- graph_element.append(nodes_element)
-
- defget_attr_id(self,title,attr_type,edge_or_node,default,mode):
- # find the id of the attribute or generate a new id
- try:
- returnself.attr[edge_or_node][mode][title]
- exceptKeyError:
- # generate new id
- new_id=str(next(self.attr_id))
- self.attr[edge_or_node][mode][title]=new_id
- attr_kwargs={"id":new_id,"title":title,"type":attr_type}
- attribute=Element("attribute",**attr_kwargs)
- # add subelement for data default value if present
- default_title=default.get(title)
- ifdefault_titleisnotNone:
- default_element=Element("default")
- default_element.text=str(default_title)
- attribute.append(default_element)
- # new insert it into the XML
- attributes_element=None
- forainself.graph_element.findall("attributes"):
- # find existing attributes element by class and mode
- a_class=a.get("class")
- a_mode=a.get("mode","static")
- ifa_class==edge_or_nodeanda_mode==mode:
- attributes_element=a
- ifattributes_elementisNone:
- # create new attributes element
- attr_kwargs={"mode":mode,"class":edge_or_node}
- attributes_element=Element("attributes",**attr_kwargs)
- self.graph_element.insert(0,attributes_element)
- attributes_element.append(attribute)
- returnnew_id
-
- defadd_edges(self,G,graph_element):
- defedge_key_data(G):
- ifG.is_multigraph():
- foru,v,key,datainG.edges:
- edge_data=data.copy()
- edge_data.update(key=key)
- edge_id=edge_data.pop("id",None)
- ifedge_idisNone:
- edge_id=next(self.edge_id)
- whilestr(edge_id)inself.all_edge_ids:
- edge_id=next(self.edge_id)
- self.all_edge_ids.add(str(edge_id))
- yieldu,v,edge_id,edge_data
- else:
- foru,v,datainG.edges:
- edge_data=data.copy()
- edge_id=edge_data.pop("id",None)
- ifedge_idisNone:
- edge_id=next(self.edge_id)
- whilestr(edge_id)inself.all_edge_ids:
- edge_id=next(self.edge_id)
- self.all_edge_ids.add(str(edge_id))
- yieldu,v,edge_id,edge_data
-
- edges_element=Element("edges")
- foru,v,key,edge_datainedge_key_data(G):
- kw={"id":str(key)}
- try:
- edge_label=edge_data.pop("label")
- kw["label"]=str(edge_label)
- exceptKeyError:
- pass
- try:
- edge_weight=edge_data.pop("weight")
- kw["weight"]=str(edge_weight)
- exceptKeyError:
- pass
- try:
- edge_type=edge_data.pop("type")
- kw["type"]=str(edge_type)
- exceptKeyError:
- pass
- try:
- start=edge_data.pop("start")
- kw["start"]=str(start)
- self.alter_graph_mode_timeformat(start)
- exceptKeyError:
- pass
- try:
- end=edge_data.pop("end")
- kw["end"]=str(end)
- self.alter_graph_mode_timeformat(end)
- exceptKeyError:
- pass
- source_id=str(G.nodes[u].get("id",u))
- target_id=str(G.nodes[v].get("id",v))
- edge_element=Element("edge",source=source_id,target=target_id,**kw)
- default=G.graph.get("edge_default",{})
- ifself.VERSION=="1.1":
- edge_data=self.add_slices(edge_element,edge_data)
- else:
- edge_data=self.add_spells(edge_element,edge_data)
- edge_data=self.add_viz(edge_element,edge_data)
- edge_data=self.add_attributes("edge",edge_element,edge_data,default)
- edges_element.append(edge_element)
- graph_element.append(edges_element)
-
- defadd_attributes(self,node_or_edge,xml_obj,data,default):
- # Add attrvalues to node or edge
- attvalues=Element("attvalues")
- iflen(data)==0:
- returndata
- mode="static"
- fork,vindata.items():
- # rename generic multigraph key to avoid any name conflict
- ifk=="key":
- k="easygraph_key"
- val_type=type(v)
- ifval_typenotinself.xml_type:
- raiseTypeError(f"attribute value type is not allowed: {val_type}")
- ifisinstance(v,list):
- # dynamic data
- forval,start,endinv:
- val_type=type(val)
- ifstartisnotNoneorendisnotNone:
- mode="dynamic"
- self.alter_graph_mode_timeformat(start)
- self.alter_graph_mode_timeformat(end)
- break
- attr_id=self.get_attr_id(
- str(k),self.xml_type[val_type],node_or_edge,default,mode
- )
- forval,start,endinv:
- e=Element("attvalue")
- e.attrib["for"]=attr_id
- e.attrib["value"]=str(val)
- # Handle nan, inf, -inf differently
- ifval_type==float:
- ife.attrib["value"]=="inf":
- e.attrib["value"]="INF"
- elife.attrib["value"]=="nan":
- e.attrib["value"]="NaN"
- elife.attrib["value"]=="-inf":
- e.attrib["value"]="-INF"
- ifstartisnotNone:
- e.attrib["start"]=str(start)
- ifendisnotNone:
- e.attrib["end"]=str(end)
- attvalues.append(e)
- else:
- # static data
- mode="static"
- attr_id=self.get_attr_id(
- str(k),self.xml_type[val_type],node_or_edge,default,mode
- )
- e=Element("attvalue")
- e.attrib["for"]=attr_id
- ifisinstance(v,bool):
- e.attrib["value"]=str(v).lower()
- else:
- e.attrib["value"]=str(v)
- # Handle float nan, inf, -inf differently
- ifval_type==float:
- ife.attrib["value"]=="inf":
- e.attrib["value"]="INF"
- elife.attrib["value"]=="nan":
- e.attrib["value"]="NaN"
- elife.attrib["value"]=="-inf":
- e.attrib["value"]="-INF"
- attvalues.append(e)
- xml_obj.append(attvalues)
- returndata
-
- defadd_viz(self,element,node_data):
- viz=node_data.pop("viz",False)
- ifviz:
- color=viz.get("color")
- ifcolorisnotNone:
- ifself.VERSION=="1.1":
- e=Element(
- f"{{{self.NS_VIZ}}}color",
- r=str(color.get("r")),
- g=str(color.get("g")),
- b=str(color.get("b")),
- )
- else:
- e=Element(
- f"{{{self.NS_VIZ}}}color",
- r=str(color.get("r")),
- g=str(color.get("g")),
- b=str(color.get("b")),
- a=str(color.get("a")),
- )
- element.append(e)
-
- size=viz.get("size")
- ifsizeisnotNone:
- e=Element(f"{{{self.NS_VIZ}}}size",value=str(size))
- element.append(e)
-
- thickness=viz.get("thickness")
- ifthicknessisnotNone:
- e=Element(f"{{{self.NS_VIZ}}}thickness",value=str(thickness))
- element.append(e)
-
- shape=viz.get("shape")
- ifshapeisnotNone:
- ifshape.startswith("http"):
- e=Element(
- f"{{{self.NS_VIZ}}}shape",value="image",uri=str(shape)
- )
- else:
- e=Element(f"{{{self.NS_VIZ}}}shape",value=str(shape))
- element.append(e)
-
- position=viz.get("position")
- ifpositionisnotNone:
- e=Element(
- f"{{{self.NS_VIZ}}}position",
- x=str(position.get("x")),
- y=str(position.get("y")),
- z=str(position.get("z")),
- )
- element.append(e)
- returnnode_data
-
- defadd_parents(self,node_element,node_data):
- parents=node_data.pop("parents",False)
- ifparents:
- parents_element=Element("parents")
- forpinparents:
- e=Element("parent")
- e.attrib["for"]=str(p)
- parents_element.append(e)
- node_element.append(parents_element)
- returnnode_data
-
- defadd_slices(self,node_or_edge_element,node_or_edge_data):
- slices=node_or_edge_data.pop("slices",False)
- ifslices:
- slices_element=Element("slices")
- forstart,endinslices:
- e=Element("slice",start=str(start),end=str(end))
- slices_element.append(e)
- node_or_edge_element.append(slices_element)
- returnnode_or_edge_data
-
- defadd_spells(self,node_or_edge_element,node_or_edge_data):
- spells=node_or_edge_data.pop("spells",False)
- ifspells:
- spells_element=Element("spells")
- forstart,endinspells:
- e=Element("spell")
- ifstartisnotNone:
- e.attrib["start"]=str(start)
- self.alter_graph_mode_timeformat(start)
- ifendisnotNone:
- e.attrib["end"]=str(end)
- self.alter_graph_mode_timeformat(end)
- spells_element.append(e)
- node_or_edge_element.append(spells_element)
- returnnode_or_edge_data
-
- defalter_graph_mode_timeformat(self,start_or_end):
- ifself.graph_element.get("mode")=="static":
- ifstart_or_endisnotNone:
- ifisinstance(start_or_end,str):
- timeformat="date"
- elifisinstance(start_or_end,float):
- timeformat="double"
- elifisinstance(start_or_end,int):
- timeformat="long"
- else:
- raiseAssertionError(
- "timeformat should be of the type int, float or str"
- )
- self.graph_element.set("timeformat",timeformat)
- self.graph_element.set("mode","dynamic")
-
- defwrite(self,fh):
- ifself.prettyprint:
- self.indent(self.xml)
- document=ElementTree(self.xml)
- document.write(fh,encoding=self.encoding,xml_declaration=True)
-
- defindent(self,elem,level=0):
- i="\n"+" "*level
- iflen(elem):
- ifnotelem.textornotelem.text.strip():
- elem.text=i+" "
- ifnotelem.tailornotelem.tail.strip():
- elem.tail=i
- foreleminelem:
- self.indent(elem,level+1)
- ifnotelem.tailornotelem.tail.strip():
- elem.tail=i
- ifnotelem.tailornotelem.tail.strip():
- elem.tail=i
- else:
- ifleveland(notelem.tailornotelem.tail.strip()):
- elem.tail=i
-
-
-classGEXFReader(GEXF):
- # Class to read GEXF format files
- # use read_gexf() function
- def__init__(self,node_type=None,version="1.2draft"):
- self.construct_types()
- self.node_type=node_type
- # assume simple graph and test for multigraph on read
- self.simple_graph=True
- self.set_version(version)
-
- def__call__(self,stream):
- self.xml=ElementTree(file=stream)
- g=self.xml.find(f"{{{self.NS_GEXF}}}graph")
- ifgisnotNone:
- returnself.make_graph(g)
- # try all the versions
- forversioninself.versions:
- self.set_version(version)
- g=self.xml.find(f"{{{self.NS_GEXF}}}graph")
- ifgisnotNone:
- returnself.make_graph(g)
- raiseEasyGraphError("No <graph> element in GEXF file.")
-
- defmake_graph(self,graph_xml):
- edgedefault=graph_xml.get("defaultedgetype",None)
- ifedgedefault=="directed":
- G=eg.MultiDiGraph()
- else:
- G=eg.MultiGraph()
-
- # graph attributes
- graph_name=graph_xml.get("name","")
- ifgraph_name!="":
- G.graph["name"]=graph_name
- graph_start=graph_xml.get("start")
- ifgraph_startisnotNone:
- G.graph["start"]=graph_start
- graph_end=graph_xml.get("end")
- ifgraph_endisnotNone:
- G.graph["end"]=graph_end
- graph_mode=graph_xml.get("mode","")
- ifgraph_mode=="dynamic":
- G.graph["mode"]="dynamic"
- else:
- G.graph["mode"]="static"
-
- # timeformat
- self.timeformat=graph_xml.get("timeformat")
- ifself.timeformat=="date":
- self.timeformat="string"
-
- # node and edge attributes
- attributes_elements=graph_xml.findall(f"{{{self.NS_GEXF}}}attributes")
- # dictionaries to hold attributes and attribute defaults
- node_attr={}
- node_default={}
- edge_attr={}
- edge_default={}
- forainattributes_elements:
- attr_class=a.get("class")
- ifattr_class=="node":
- na,nd=self.find_gexf_attributes(a)
- node_attr.update(na)
- node_default.update(nd)
- G.graph["node_default"]=node_default
- elifattr_class=="edge":
- ea,ed=self.find_gexf_attributes(a)
- edge_attr.update(ea)
- edge_default.update(ed)
- G.graph["edge_default"]=edge_default
- else:
- raise# unknown attribute class
-
- # Hack to handle Gephi0.7beta bug
- # add weight attribute
- ea={"weight":{"type":"double","mode":"static","title":"weight"}}
- ed={}
- edge_attr.update(ea)
- edge_default.update(ed)
- G.graph["edge_default"]=edge_default
-
- # add nodes
- nodes_element=graph_xml.find(f"{{{self.NS_GEXF}}}nodes")
- ifnodes_elementisnotNone:
- fornode_xmlinnodes_element.findall(f"{{{self.NS_GEXF}}}node"):
- self.add_node(G,node_xml,node_attr)
-
- # add edges
- edges_element=graph_xml.find(f"{{{self.NS_GEXF}}}edges")
- ifedges_elementisnotNone:
- foredge_xmlinedges_element.findall(f"{{{self.NS_GEXF}}}edge"):
- self.add_edge(G,edge_xml,edge_attr)
-
- # switch to Graph or DiGraph if no parallel edges were found.
- ifself.simple_graph:
- ifG.is_directed():
- G=eg.DiGraph(G)
- else:
- G=eg.Graph(G)
- returnG
-
- defadd_node(self,G,node_xml,node_attr,node_pid=None):
- # add a single node with attributes to the graph
-
- # get attributes and subattributues for node
- data=self.decode_attr_elements(node_attr,node_xml)
- data=self.add_parents(data,node_xml)# add any parents
- ifself.VERSION=="1.1":
- data=self.add_slices(data,node_xml)# add slices
- else:
- data=self.add_spells(data,node_xml)# add spells
- data=self.add_viz(data,node_xml)# add viz
- data=self.add_start_end(data,node_xml)# add start/end
-
- # find the node id and cast it to the appropriate type
- node_id=node_xml.get("id")
- ifself.node_typeisnotNone:
- node_id=self.node_type(node_id)
-
- # every node should have a label
- node_label=node_xml.get("label")
- data["label"]=node_label
-
- # parent node id
- node_pid=node_xml.get("pid",node_pid)
- ifnode_pidisnotNone:
- data["pid"]=node_pid
-
- # check for subnodes, recursive
- subnodes=node_xml.find(f"{{{self.NS_GEXF}}}nodes")
- ifsubnodesisnotNone:
- fornode_xmlinsubnodes.findall(f"{{{self.NS_GEXF}}}node"):
- self.add_node(G,node_xml,node_attr,node_pid=node_id)
-
- G.add_node(node_id,**data)
-
- defadd_start_end(self,data,xml):
- # start and end times
- ttype=self.timeformat
- node_start=xml.get("start")
- ifnode_startisnotNone:
- data["start"]=self.python_type[ttype](node_start)
- node_end=xml.get("end")
- ifnode_endisnotNone:
- data["end"]=self.python_type[ttype](node_end)
- returndata
-
- defadd_viz(self,data,node_xml):
- # add viz element for node
- viz={}
- color=node_xml.find(f"{{{self.NS_VIZ}}}color")
- ifcolorisnotNone:
- ifself.VERSION=="1.1":
- viz["color"]={
- "r":int(color.get("r")),
- "g":int(color.get("g")),
- "b":int(color.get("b")),
- }
- else:
- viz["color"]={
- "r":int(color.get("r")),
- "g":int(color.get("g")),
- "b":int(color.get("b")),
- "a":float(color.get("a",1)),
- }
-
- size=node_xml.find(f"{{{self.NS_VIZ}}}size")
- ifsizeisnotNone:
- viz["size"]=float(size.get("value"))
-
- thickness=node_xml.find(f"{{{self.NS_VIZ}}}thickness")
- ifthicknessisnotNone:
- viz["thickness"]=float(thickness.get("value"))
-
- shape=node_xml.find(f"{{{self.NS_VIZ}}}shape")
- ifshapeisnotNone:
- viz["shape"]=shape.get("shape")
- ifviz["shape"]=="image":
- viz["shape"]=shape.get("uri")
-
- position=node_xml.find(f"{{{self.NS_VIZ}}}position")
- ifpositionisnotNone:
- viz["position"]={
- "x":float(position.get("x",0)),
- "y":float(position.get("y",0)),
- "z":float(position.get("z",0)),
- }
-
- iflen(viz)>0:
- data["viz"]=viz
- returndata
-
- defadd_parents(self,data,node_xml):
- parents_element=node_xml.find(f"{{{self.NS_GEXF}}}parents")
- ifparents_elementisnotNone:
- data["parents"]=[]
- forpinparents_element.findall(f"{{{self.NS_GEXF}}}parent"):
- parent=p.get("for")
- data["parents"].append(parent)
- returndata
-
- defadd_slices(self,data,node_or_edge_xml):
- slices_element=node_or_edge_xml.find(f"{{{self.NS_GEXF}}}slices")
- ifslices_elementisnotNone:
- data["slices"]=[]
- forsinslices_element.findall(f"{{{self.NS_GEXF}}}slice"):
- start=s.get("start")
- end=s.get("end")
- data["slices"].append((start,end))
- returndata
-
- defadd_spells(self,data,node_or_edge_xml):
- spells_element=node_or_edge_xml.find(f"{{{self.NS_GEXF}}}spells")
- ifspells_elementisnotNone:
- data["spells"]=[]
- ttype=self.timeformat
- forsinspells_element.findall(f"{{{self.NS_GEXF}}}spell"):
- start=self.python_type[ttype](s.get("start"))
- end=self.python_type[ttype](s.get("end"))
- data["spells"].append((start,end))
- returndata
-
- defadd_edge(self,G,edge_element,edge_attr):
- # add an edge to the graph
-
- # raise error if we find mixed directed and undirected edges
- edge_direction=edge_element.get("type")
- ifG.is_directed()andedge_direction=="undirected":
- raiseEasyGraphError("Undirected edge found in directed graph.")
- if(notG.is_directed())andedge_direction=="directed":
- raiseEasyGraphError("Directed edge found in undirected graph.")
-
- # Get source and target and recast type if required
- source=edge_element.get("source")
- target=edge_element.get("target")
- ifself.node_typeisnotNone:
- source=self.node_type(source)
- target=self.node_type(target)
-
- data=self.decode_attr_elements(edge_attr,edge_element)
- data=self.add_start_end(data,edge_element)
-
- ifself.VERSION=="1.1":
- data=self.add_slices(data,edge_element)# add slices
- else:
- data=self.add_spells(data,edge_element)# add spells
-
- # GEXF stores edge ids as an attribute
- # EasyGraph uses them as keys in multigraphs
- # if easygraph_key is not specified as an attribute
- edge_id=edge_element.get("id")
- ifedge_idisnotNone:
- data["id"]=edge_id
-
- # check if there is a 'multigraph_key' and use that as edge_id
- multigraph_key=data.pop("easygraph_key",None)
- ifmultigraph_keyisnotNone:
- edge_id=multigraph_key
-
- weight=edge_element.get("weight")
- ifweightisnotNone:
- data["weight"]=float(weight)
-
- edge_label=edge_element.get("label")
- ifedge_labelisnotNone:
- data["label"]=edge_label
-
- ifG.has_edge(source,target):
- # seen this edge before - this is a multigraph
- self.simple_graph=False
- G.add_edge(source,target,key=edge_id,**data)
- ifedge_direction=="mutual":
- G.add_edge(target,source,key=edge_id,**data)
-
- defdecode_attr_elements(self,gexf_keys,obj_xml):
- # Use the key information to decode the attr XML
- attr={}
- # look for outer '<attvalues>' element
- attr_element=obj_xml.find(f"{{{self.NS_GEXF}}}attvalues")
- ifattr_elementisnotNone:
- # loop over <attvalue> elements
- forainattr_element.findall(f"{{{self.NS_GEXF}}}attvalue"):
- key=a.get("for")# for is required
- try:# should be in our gexf_keys dictionary
- title=gexf_keys[key]["title"]
- exceptKeyErroraserr:
- raiseeg.EasyGraphError(f"No attribute defined for={key}.")fromerr
- atype=gexf_keys[key]["type"]
- value=a.get("value")
- ifatype=="boolean":
- value=self.convert_bool[value]
- else:
- value=self.python_type[atype](value)
- ifgexf_keys[key]["mode"]=="dynamic":
- # for dynamic graphs use list of three-tuples
- # [(value1,start1,end1), (value2,start2,end2), etc]
- ttype=self.timeformat
- start=self.python_type[ttype](a.get("start"))
- end=self.python_type[ttype](a.get("end"))
- iftitleinattr:
- attr[title].append((value,start,end))
- else:
- attr[title]=[(value,start,end)]
- else:
- # for static graphs just assign the value
- attr[title]=value
- returnattr
-
- deffind_gexf_attributes(self,attributes_element):
- # Extract all the attributes and defaults
- attrs={}
- defaults={}
- mode=attributes_element.get("mode")
- forkinattributes_element.findall(f"{{{self.NS_GEXF}}}attribute"):
- attr_id=k.get("id")
- title=k.get("title")
- atype=k.get("type")
- attrs[attr_id]={"title":title,"type":atype,"mode":mode}
- # check for the 'default' subelement of key element and add
- default=k.find(f"{{{self.NS_GEXF}}}default")
- ifdefaultisnotNone:
- ifatype=="boolean":
- value=self.convert_bool[default.text]
- else:
- value=self.python_type[atype](default.text)
- defaults[title]=value
- returnattrs,defaults
-
-
-
[docs]defrelabel_gexf_graph(G):
-"""Relabel graph using "label" node keyword for node label.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph read from GEXF data
-
- Returns
- -------
- H : graph
- A EasyGraph graph with relabeled nodes
-
- Raises
- ------
- EasyGraphError
- If node labels are missing or not unique while relabel=True.
-
- Notes
- -----
- This function relabels the nodes in a EasyGraph graph with the
- "label" attribute. It also handles relabeling the specific GEXF
- node attributes "parents", and "pid".
- """
- # build mapping of node labels, do some error checking
- try:
- mapping=[(u,G.nodes[u]["label"])foruinG]
- exceptKeyErroraserr:
- raiseEasyGraphError(
- "Failed to relabel nodes: missing node labels found. Use relabel=False."
- )fromerr
- x,y=zip(*mapping)
- iflen(set(y))!=len(G):
- raiseEasyGraphError(
- "Failed to relabel nodes: duplicate node labels found. Use relabel=False."
- )
- mapping=dict(mapping)
- H=eg.relabel_nodes(G,mapping)
- # relabel attributes
- forninG:
- m=mapping[n]
- H.nodes[m]["id"]=n
- H.nodes[m].pop("label")
- if"pid"inH.nodes[m]:
- H.nodes[m]["pid"]=mapping[G.nodes[n]["pid"]]
- if"parents"inH.nodes[m]:
- H.nodes[m]["parents"]=[mapping[p]forpinG.nodes[n]["parents"]]
- returnH
-"""
-Read graphs in GML format.
-
-"GML, the Graph Modelling Language, is our proposal for a portable
-file format for graphs. GML's key features are portability, simple
-syntax, extensibility and flexibility. A GML file consists of a
-hierarchical key-value lists. Graphs can be annotated with arbitrary
-data structures. The idea for a common file format was born at the
-GD'95; this proposal is the outcome of many discussions. GML is the
-standard file format in the Graphlet graph editor system. It has been
-overtaken and adapted by several other systems for drawing graphs."
-
-GML files are stored using a 7-bit ASCII encoding with any extended
-ASCII characters (iso8859-1) appearing as HTML character entities.
-You will need to give some thought into how the exported data should
-interact with different languages and even different Python versions.
-Re-importing from gml is also a concern.
-
-Without specifying a `stringizer`/`destringizer`, the code is capable of
-writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
-specification. For writing other data types, and for reading data other
-than `str` you need to explicitly supply a `stringizer`/`destringizer`.
-
-For additional documentation on the GML file format, please see the
-`GML website <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
-
-Several example graphs in GML format may be found on Mark Newman's
-`Network data page <http://www-personal.umich.edu/~mejn/netdata/>`_.
-"""
-
-
-importhtml.entitiesashtmlentitydefs
-importre
-importwarnings
-
-fromastimportliteral_eval
-fromcollectionsimportdefaultdict
-fromenumimportEnum
-fromioimportStringIO
-fromtypingimportAny
-fromtypingimportNamedTuple
-fromunicodedataimportcategory
-
-importeasygraphaseg
-
-fromeasygraph.utilsimportopen_file
-fromeasygraph.utils.exceptionimportEasyGraphError
-
-
-__all__=["read_gml","parse_gml","generate_gml","write_gml"]
-
-LIST_START_VALUE="_easygraph_list_start"
-
-
-defescape(text):
-"""Use XML character references to escape characters.
-
- Use XML character references for unprintable or non-ASCII
- characters, double quotes and ampersands in a string
- """
-
- deffixup(m):
- ch=m.group(0)
- return"&#"+str(ord(ch))+";"
-
- text=re.sub('[^ -~]|[&"]',fixup,text)
- returntextifisinstance(text,str)elsestr(text)
-
-
-defunescape(text):
-"""Replace XML character references with the referenced characters"""
-
- deffixup(m):
- text=m.group(0)
- iftext[1]=="#":
- # Character reference
- iftext[2]=="x":
- code=int(text[3:-1],16)
- else:
- code=int(text[2:-1])
- else:
- # Named entity
- try:
- code=htmlentitydefs.name2codepoint[text[1:-1]]
- exceptKeyError:
- returntext# leave unchanged
- try:
- returnchr(code)
- except(ValueError,OverflowError):
- returntext# leave unchanged
-
- returnre.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));",fixup,text)
-
-
-defliteral_destringizer(rep):
-"""Convert a Python literal to the value it represents.
-
- Parameters
- ----------
- rep : string
- A Python literal.
-
- Returns
- -------
- value : object
- The value of the Python literal.
-
- Raises
- ------
- ValueError
- If `rep` is not a Python literal.
- """
- msg="literal_destringizer is deprecated and will be removed in 3.0."
- warnings.warn(msg,DeprecationWarning)
- ifisinstance(rep,str):
- orig_rep=rep
- try:
- returnliteral_eval(rep)
- exceptSyntaxErroraserr:
- raiseValueError(f"{orig_rep!r} is not a valid Python literal")fromerr
- else:
- raiseValueError(f"{rep!r} is not a string")
-
-
-classPattern(Enum):
-"""encodes the index of each token-matching pattern in `tokenize`."""
-
- KEYS=0
- REALS=1
- INTS=2
- STRINGS=3
- DICT_START=4
- DICT_END=5
- COMMENT_WHITESPACE=6
-
-
-classToken(NamedTuple):
- category:Pattern
- value:Any
- line:int
- position:int
-
-
-
[docs]defparse_gml(lines,label="label",destringizer=None):
-"""Parse GML graph from a string or iterable.
-
- Parameters
- ----------
- lines : string or iterable of strings
- Data in GML format.
-
- label : string, optional
- If not None, the parsed nodes will be renamed according to node
- attributes indicated by `label`. Default value: 'label'.
-
- destringizer : callable, optional
- A `destringizer` that recovers values stored as strings in GML. If it
- cannot convert a string to a value, a `ValueError` is raised. Default
- value : None.
-
- Returns
- -------
- G : EasyGraph graph
- The parsed graph.
-
- Raises
- ------
- EasyGraphError
- If the input cannot be parsed.
-
- See Also
- --------
- write_gml, read_gml
-
- Notes
- -----
- This stores nested GML attributes as dictionaries in the EasyGraph graph,
- node, and edge attribute structures.
-
- GML files are stored using a 7-bit ASCII encoding with any extended
- ASCII characters (iso8859-1) appearing as HTML character entities.
- Without specifying a `stringizer`/`destringizer`, the code is capable of
- writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
- specification. For writing other data types, and for reading data other
- than `str` you need to explicitly supply a `stringizer`/`destringizer`.
-
- For additional documentation on the GML file format, please see the
- `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
-
- See the module docstring :mod:`easygraph.readwrite.gml` for more details.
- """
-
- defdecode_line(line):
- ifisinstance(line,bytes):
- try:
- line.decode("ascii")
- exceptUnicodeDecodeErroraserr:
- raiseEasyGraphError("input is not ASCII-encoded")fromerr
- ifnotisinstance(line,str):
- line=str(line)
- returnline
-
- deffilter_lines(lines):
- ifisinstance(lines,str):
- lines=decode_line(lines)
- lines=lines.splitlines()
- yield fromlines
- else:
- forlineinlines:
- line=decode_line(line)
- iflineandline[-1]=="\n":
- line=line[:-1]
- ifline.find("\n")!=-1:
- raiseEasyGraphError("input line contains newline")
- yieldline
-
- G=parse_gml_lines(filter_lines(lines),label,destringizer)
- returnG
[docs]defgenerate_gml(G,stringizer=None):
-r"""Generate a single entry of the graph `G` in GML format.
-
- Parameters
- ----------
- G : EasyGraph graph
- The graph to be converted to GML.
-
- stringizer : callable, optional
- A `stringizer` which converts non-int/non-float/non-dict values into
- strings. If it cannot convert a value into a string, it should raise a
- `ValueError` to indicate that. Default value: None.
-
- Returns
- -------
- lines: generator of strings
- Lines of GML data. Newlines are not appended.
-
- Raises
- ------
- EasyGraphError
- If `stringizer` cannot convert a value into a string, or the value to
- convert is not a string while `stringizer` is None.
-
- See Also
- --------
- literal_stringizer
-
- Notes
- -----
- Graph attributes named 'directed', 'multigraph', 'node' or
- 'edge', node attributes named 'id' or 'label', edge attributes
- named 'source' or 'target' (or 'key' if `G` is a multigraph)
- are ignored because these attribute names are used to encode the graph
- structure.
-
- GML files are stored using a 7-bit ASCII encoding with any extended
- ASCII characters (iso8859-1) appearing as HTML character entities.
- Without specifying a `stringizer`/`destringizer`, the code is capable of
- writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
- specification. For writing other data types, and for reading data other
- than `str` you need to explicitly supply a `stringizer`/`destringizer`.
-
- For additional documentation on the GML file format, please see the
- `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
-
- See the module docstring :mod:`easygraph.readwrite.gml` for more details.
-
- Examples
- --------
- >>> G = eg.Graph()
- >>> G.add_node("1")
- >>> print("\n".join(eg.generate_gml(G)))
- graph [
- node [
- id 0
- label "1"
- ]
- ]
- """
- valid_keys=re.compile("^[A-Za-z][0-9A-Za-z_]*$")
-
- defstringize(key,value,ignored_keys,indent,in_list=False):
- ifnotisinstance(key,str):
- raiseEasyGraphError(f"{key!r} is not a string")
- ifnotvalid_keys.match(key):
- raiseEasyGraphError(f"{key!r} is not a valid key")
- ifnotisinstance(key,str):
- key=str(key)
- ifkeynotinignored_keys:
- ifisinstance(value,(int,bool)):
- ifkey=="label":
- yieldindent+key+' "'+str(value)+'"'
- elifvalueisTrue:
- # python bool is an instance of int
- yieldindent+key+" 1"
- elifvalueisFalse:
- yieldindent+key+" 0"
- # GML only supports signed 32-bit integers
- elifvalue<-(2**31)orvalue>=2**31:
- yieldindent+key+' "'+str(value)+'"'
- else:
- yieldindent+key+" "+str(value)
- elifisinstance(value,float):
- text=repr(value).upper()
- # GML matches INF to keys, so prepend + to INF. Use repr(float(*))
- # instead of string literal to future proof against changes to repr.
- iftext==repr(float("inf")).upper():
- text="+"+text
- else:
- # GML requires that a real literal contain a decimal point, but
- # repr may not output a decimal point when the mantissa is
- # integral and hence needs fixing.
- epos=text.rfind("E")
- ifepos!=-1andtext.find(".",0,epos)==-1:
- text=text[:epos]+"."+text[epos:]
- ifkey=="label":
- yieldindent+key+' "'+text+'"'
- else:
- yieldindent+key+" "+text
- elifisinstance(value,dict):
- yieldindent+key+" ["
- next_indent=indent+" "
- forkey,valueinvalue.items():
- yield fromstringize(key,value,(),next_indent)
- yieldindent+"]"
- elif(
- isinstance(value,(list,tuple))
- andkey!="label"
- andvalue
- andnotin_list
- ):
- iflen(value)==1:
- yieldindent+key+" "+f'"{LIST_START_VALUE}"'
- forvalinvalue:
- yield fromstringize(key,val,(),indent,True)
- else:
- ifstringizer:
- try:
- value=stringizer(value)
- exceptValueErroraserr:
- raiseEasyGraphError(
- f"{value!r} cannot be converted into a string"
- )fromerr
- ifnotisinstance(value,str):
- raiseEasyGraphError(f"{value!r} is not a string")
- yieldindent+key+' "'+escape(value)+'"'
-
- yield"graph ["
-
- # Output graph attributes
- multigraph=G.is_multigraph()
- ifG.is_directed():
- yield" directed 1"
- ifmultigraph:
- yield" multigraph 1"
- ignored_keys={"directed","multigraph","node","edge"}
- forattr,valueinG.graph.items():
- yield fromstringize(attr,value,ignored_keys," ")
-
- # Output node data
- node_id=dict(zip(G,range(len(G))))
- ignored_keys={"id","label"}
- fornode,attrsinG.nodes.items():
- yield" node ["
- yield" id "+str(node_id[node])
- yield fromstringize("label",node,()," ")
- forattr,valueinattrs.items():
- yield fromstringize(attr,value,ignored_keys," ")
- yield" ]"
-
- # Output edge data
- ignored_keys={"source","target"}
- kwargs={"data":True}
- ifmultigraph:
- ignored_keys.add("key")
- kwargs["keys"]=True
- foreinG.edges:
- yield" edge ["
- yield" source "+str(node_id[e[0]])
- yield" target "+str(node_id[e[1]])
- ifmultigraph:
- yield fromstringize("key",e[2],()," ")
- forattr,valueine[-1].items():
- yield fromstringize(attr,value,ignored_keys," ")
- yield" ]"
- yield"]"
-
-
-
[docs]@open_file(0,mode="rb")
-defread_gml(path,label="label",destringizer=None):
-"""Read graph in GML format from `path`.
-
- Parameters
- ----------
- path : filename or filehandle
- The filename or filehandle to read from.
-
- label : string, optional
- If not None, the parsed nodes will be renamed according to node
- attributes indicated by `label`. Default value: 'label'.
-
- destringizer : callable, optional
- A `destringizer` that recovers values stored as strings in GML. If it
- cannot convert a string to a value, a `ValueError` is raised. Default
- value : None.
-
- Returns
- -------
- G : EasyGraph graph
- The parsed graph.
-
- Raises
- ------
- EasyGraphError
- If the input cannot be parsed.
-
- See Also
- --------
- write_gml, parse_gml
- literal_destringizer
-
- Notes
- -----
- GML files are stored using a 7-bit ASCII encoding with any extended
- ASCII characters (iso8859-1) appearing as HTML character entities.
- Without specifying a `stringizer`/`destringizer`, the code is capable of
- writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
- specification. For writing other data types, and for reading data other
- than `str` you need to explicitly supply a `stringizer`/`destringizer`.
-
- For additional documentation on the GML file format, please see the
- `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
-
- See the module docstring :mod:`easygraph.readwrite.gml` for more details.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> eg.write_gml(G, "test.gml")
-
- GML values are interpreted as strings by default:
-
- >>> H = eg.read_gml("test.gml")
- >>> H.nodes
- NodeView(('0', '1', '2', '3'))
-
- When a `destringizer` is provided, GML values are converted to the provided type.
- For example, integer nodes can be recovered as shown below:
-
- >>> J = eg.read_gml("test.gml", destringizer=int)
- >>> J.nodes
- NodeView((0, 1, 2, 3))
-
- """
-
- deffilter_lines(lines):
- forlineinlines:
- try:
- line=line.decode("ascii")
- exceptUnicodeDecodeErroraserr:
- raiseEasyGraphError("input is not ASCII-encoded")fromerr
- ifnotisinstance(line,str):
- lines=str(lines)
- iflineandline[-1]=="\n":
- line=line[:-1]
- yieldline
-
- G=parse_gml_lines(filter_lines(path),label,destringizer)
- returnG
-
-
-
[docs]@open_file(1,mode="wb")
-defwrite_gml(G,path,stringizer=None):
-"""Write a graph `G` in GML format to the file or file handle `path`.
-
- Parameters
- ----------
- G : EasyGraph graph
- The graph to be converted to GML.
-
- path : filename or filehandle
- The filename or filehandle to write. Files whose names end with .gz or
- .bz2 will be compressed.
-
- stringizer : callable, optional
- A `stringizer` which converts non-int/non-float/non-dict values into
- strings. If it cannot convert a value into a string, it should raise a
- `ValueError` to indicate that. Default value: None.
-
- Raises
- ------
- EasyGraphError
- If `stringizer` cannot convert a value into a string, or the value to
- convert is not a string while `stringizer` is None.
-
- See Also
- --------
- read_gml, generate_gml
- literal_stringizer
-
- Notes
- -----
- Graph attributes named 'directed', 'multigraph', 'node' or
- 'edge', node attributes named 'id' or 'label', edge attributes
- named 'source' or 'target' (or 'key' if `G` is a multigraph)
- are ignored because these attribute names are used to encode the graph
- structure.
-
- GML files are stored using a 7-bit ASCII encoding with any extended
- ASCII characters (iso8859-1) appearing as HTML character entities.
- Without specifying a `stringizer`/`destringizer`, the code is capable of
- writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
- specification. For writing other data types, and for reading data other
- than `str` you need to explicitly supply a `stringizer`/`destringizer`.
-
- Note that while we allow non-standard GML to be read from a file, we make
- sure to write GML format. In particular, underscores are not allowed in
- attribute names.
- For additional documentation on the GML file format, please see the
- `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
-
- See the module docstring :mod:`easygraph.readwrite.gml` for more details.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> eg.write_gml(G, "test.gml")
-
- Filenames ending in .gz or .bz2 will be compressed.
-
- >>> eg.write_gml(G, "test.gml.gz")
- """
- forlineingenerate_gml(G,stringizer):
- path.write((line+"\n").encode("ascii"))
-"""
-*******
-GraphML
-*******
-Read and write graphs in GraphML format.
-
-.. warning::
-
- This parser uses the standard xml library present in Python, which is
- insecure - see :doc:`library/xml` for additional information.
- Only parse GraphML files you trust.
-
-This implementation does not support mixed graphs (directed and unidirected
-edges together), hyperedges, nested graphs, or ports.
-
-"GraphML is a comprehensive and easy-to-use file format for graphs. It
-consists of a language core to describe the structural properties of a
-graph and a flexible extension mechanism to add application-specific
-data. Its main features include support of
-
- * directed, undirected, and mixed graphs,
- * hypergraphs,
- * hierarchical graphs,
- * graphical representations,
- * references to external data,
- * application-specific attribute data, and
- * light-weight parsers.
-
-Unlike many other file formats for graphs, GraphML does not use a
-custom syntax. Instead, it is based on XML and hence ideally suited as
-a common denominator for all kinds of services generating, archiving,
-or processing graphs."
-
-http://graphml.graphdrawing.org/
-
-Format
-------
-GraphML is an XML format. See
-http://graphml.graphdrawing.org/specification.html for the specification and
-http://graphml.graphdrawing.org/primer/graphml-primer.html
-for examples.
-"""
-
-importwarnings
-
-fromcollectionsimportdefaultdict
-
-importeasygraphaseg
-
-fromeasygraph.utilsimportopen_file
-fromeasygraph.utils.exceptionimportEasyGraphError
-
-
-__all__=[
- "write_graphml",
- "read_graphml",
- "generate_graphml",
- "write_graphml_xml",
- "write_graphml_lxml",
- "parse_graphml",
- "GraphMLWriter",
- "GraphMLReader",
-]
-
-
-
[docs]@open_file(1,mode="wb")
-defwrite_graphml_xml(
- G,
- path,
- encoding="utf-8",
- prettyprint=True,
- infer_numeric_types=False,
- named_key_ids=False,
- edge_id_from_attribute=None,
-):
-"""Write G in GraphML XML format to path
-
- Parameters
- ----------
- G : graph
- A easygraph graph
- path : file or string
- File or filename to write.
- Filenames ending in .gz or .bz2 will be compressed.
- encoding : string (optional)
- Encoding for text data.
- prettyprint : bool (optional)
- If True use line breaks and indenting in output XML.
- infer_numeric_types : boolean
- Determine if numeric types should be generalized.
- For example, if edges have both int and float 'weight' attributes,
- we infer in GraphML that both are floats.
- named_key_ids : bool (optional)
- If True use attr.name as value for key elements' id attribute.
- edge_id_from_attribute : dict key (optional)
- If provided, the graphml edge id is set by looking up the corresponding
- edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
- the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> eg.write_graphml(G, "test.graphml")
-
- Notes
- -----
- This implementation does not support mixed graphs (directed
- and unidirected edges together) hyperedges, nested graphs, or ports.
- """
- writer=GraphMLWriter(
- encoding=encoding,
- prettyprint=prettyprint,
- infer_numeric_types=infer_numeric_types,
- named_key_ids=named_key_ids,
- edge_id_from_attribute=edge_id_from_attribute,
- )
- writer.add_graph_element(G)
- writer.dump(path)
-
-
-
[docs]@open_file(1,mode="wb")
-defwrite_graphml_lxml(
- G,
- path,
- encoding="utf-8",
- prettyprint=True,
- infer_numeric_types=False,
- named_key_ids=False,
- edge_id_from_attribute=None,
-):
-"""Write G in GraphML XML format to path
-
- This function uses the LXML framework and should be faster than
- the version using the xml library.
-
- Parameters
- ----------
- G : graph
- A easygraph graph
- path : file or string
- File or filename to write.
- Filenames ending in .gz or .bz2 will be compressed.
- encoding : string (optional)
- Encoding for text data.
- prettyprint : bool (optional)
- If True use line breaks and indenting in output XML.
- infer_numeric_types : boolean
- Determine if numeric types should be generalized.
- For example, if edges have both int and float 'weight' attributes,
- we infer in GraphML that both are floats.
- named_key_ids : bool (optional)
- If True use attr.name as value for key elements' id attribute.
- edge_id_from_attribute : dict key (optional)
- If provided, the graphml edge id is set by looking up the corresponding
- edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
- the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> eg.write_graphml_lxml(G, "fourpath.graphml")
-
- Notes
- -----
- This implementation does not support mixed graphs (directed
- and unidirected edges together) hyperedges, nested graphs, or ports.
- """
- try:
- importlxml.etreeaslxmletree
- exceptImportError:
- returnwrite_graphml_xml(
- G,
- path,
- encoding,
- prettyprint,
- infer_numeric_types,
- named_key_ids,
- edge_id_from_attribute,
- )
-
- writer=GraphMLWriterLxml(
- path,
- graph=G,
- encoding=encoding,
- prettyprint=prettyprint,
- infer_numeric_types=infer_numeric_types,
- named_key_ids=named_key_ids,
- edge_id_from_attribute=edge_id_from_attribute,
- )
- writer.dump()
-
-
-
[docs]defgenerate_graphml(
- G,
- encoding="utf-8",
- prettyprint=True,
- named_key_ids=False,
- edge_id_from_attribute=None,
-):
-"""Generate GraphML lines for G
-
- Parameters
- ----------
- G : graph
- A easygraph graph
- encoding : string (optional)
- Encoding for text data.
- prettyprint : bool (optional)
- If True use line breaks and indenting in output XML.
- named_key_ids : bool (optional)
- If True use attr.name as value for key elements' id attribute.
- edge_id_from_attribute : dict key (optional)
- If provided, the graphml edge id is set by looking up the corresponding
- edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
- the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> linefeed = chr(10) # linefeed = \n
- >>> s = linefeed.join(eg.generate_graphml(G))
- >>> for line in eg.generate_graphml(G): # doctest: +SKIP
- ... print(line)
-
- Notes
- -----
- This implementation does not support mixed graphs (directed and unidirected
- edges together) hyperedges, nested graphs, or ports.
- """
- writer=GraphMLWriter(
- encoding=encoding,
- prettyprint=prettyprint,
- named_key_ids=named_key_ids,
- edge_id_from_attribute=edge_id_from_attribute,
- )
- writer.add_graph_element(G)
- yield fromstr(writer).splitlines()
-
-
-
[docs]@open_file(0,mode="rb")
-defread_graphml(path,node_type=str,edge_key_type=int,force_multigraph=False):
-"""Read graph in GraphML format from path.
-
- Parameters
- ----------
- path : file or string
- File or filename to write.
- Filenames ending in .gz or .bz2 will be compressed.
-
- node_type: Python type (default: str)
- Convert node ids to this type
-
- edge_key_type: Python type (default: int)
- Convert graphml edge ids to this type. Multigraphs use id as edge key.
- Non-multigraphs add to edge attribute dict with name "id".
-
- force_multigraph : bool (default: False)
- If True, return a multigraph with edge keys. If False (the default)
- return a multigraph when multiedges are in the graph.
-
- Returns
- -------
- graph: EasyGraph graph
- If parallel edges are present or `force_multigraph=True` then
- a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph.
- The returned graph is directed if the file indicates it should be.
-
- Notes
- -----
- Default node and edge attributes are not propagated to each node and edge.
- They can be obtained from `G.graph` and applied to node and edge attributes
- if desired using something like this:
-
- >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
- >>> for node, data in G.nodes(data=True): # doctest: +SKIP
- ... if "color" not in data:
- ... data["color"] = default_color
- >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
- >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
- ... if "color" not in data:
- ... data["color"] = default_color
-
- This implementation does not support mixed graphs (directed and unidirected
- edges together), hypergraphs, nested graphs, or ports.
-
- For multigraphs the GraphML edge "id" will be used as the edge
- key. If not specified then they "key" attribute will be used. If
- there is no "key" attribute a default EasyGraph multigraph edge key
- will be provided.
-
- Files with the yEd "yfiles" extension can be read. The type of the node's
- shape is preserved in the `shape_type` node attribute.
-
- yEd compressed files ("file.graphmlz" extension) can be read by renaming
- the file to "file.graphml.gz".
-
- """
- reader=GraphMLReader(node_type,edge_key_type,force_multigraph)
- # need to check for multiple graphs
- glist=list(reader(path=path))
- iflen(glist)==0:
- # If no graph comes back, try looking for an incomplete header
- header=b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
- path.seek(0)
- old_bytes=path.read()
- new_bytes=old_bytes.replace(b"<graphml>",header)
- glist=list(reader(string=new_bytes))
- iflen(glist)==0:
- raiseEasyGraphError("file not successfully read as graphml")
- returnglist[0]
-
-
-
[docs]defparse_graphml(
- graphml_string,node_type=str,edge_key_type=int,force_multigraph=False
-):
-"""Read graph in GraphML format from string.
-
- Parameters
- ----------
- graphml_string : string
- String containing graphml information
- (e.g., contents of a graphml file).
-
- node_type: Python type (default: str)
- Convert node ids to this type
-
- edge_key_type: Python type (default: int)
- Convert graphml edge ids to this type. Multigraphs use id as edge key.
- Non-multigraphs add to edge attribute dict with name "id".
-
- force_multigraph : bool (default: False)
- If True, return a multigraph with edge keys. If False (the default)
- return a multigraph when multiedges are in the graph.
-
-
- Returns
- -------
- graph: EasyGraph graph
- If no parallel edges are found a Graph or DiGraph is returned.
- Otherwise a MultiGraph or MultiDiGraph is returned.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> linefeed = chr(10) # linefeed = \n
- >>> s = linefeed.join(eg.generate_graphml(G))
- >>> H = eg.parse_graphml(s)
-
- Notes
- -----
- Default node and edge attributes are not propagated to each node and edge.
- They can be obtained from `G.graph` and applied to node and edge attributes
- if desired using something like this:
-
- >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
- >>> for node, data in G.nodes(data=True): # doctest: +SKIP
- ... if "color" not in data:
- ... data["color"] = default_color
- >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
- >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
- ... if "color" not in data:
- ... data["color"] = default_color
-
- This implementation does not support mixed graphs (directed and unidirected
- edges together), hypergraphs, nested graphs, or ports.
-
- For multigraphs the GraphML edge "id" will be used as the edge
- key. If not specified then they "key" attribute will be used. If
- there is no "key" attribute a default EasyGraph multigraph edge key
- will be provided.
-
- """
- reader=GraphMLReader(node_type,edge_key_type,force_multigraph)
- # need to check for multiple graphs
- glist=list(reader(string=graphml_string))
- iflen(glist)==0:
- # If no graph comes back, try looking for an incomplete header
- header='<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
- new_string=graphml_string.replace("<graphml>",header)
- glist=list(reader(string=new_string))
- iflen(glist)==0:
- raiseeg.EasyGraphError("file not successfully read as graphml")
- returnglist[0]
-
-
-classGraphML:
- NS_GRAPHML="http://graphml.graphdrawing.org/xmlns"
- NS_XSI="http://www.w3.org/2001/XMLSchema-instance"
- # xmlns:y="http://www.yworks.com/xml/graphml"
- NS_Y="http://www.yworks.com/xml/graphml"
- SCHEMALOCATION=" ".join(
- [
- "http://graphml.graphdrawing.org/xmlns",
- "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd",
- ]
- )
-
- defconstruct_types(self):
- types=[
- (int,"integer"),# for Gephi GraphML bug
- (str,"yfiles"),
- (str,"string"),
- (int,"int"),
- (int,"long"),
- (float,"float"),
- (float,"double"),
- (bool,"boolean"),
- ]
-
- # These additions to types allow writing numpy types
- try:
- importnumpyasnp
- except:
- pass
- else:
- # prepend so that python types are created upon read (last entry wins)
- types=[
- (np.float64,"float"),
- (np.float32,"float"),
- (np.float16,"float"),
- (np.float_,"float"),
- (np.int_,"int"),
- (np.int8,"int"),
- (np.int16,"int"),
- (np.int32,"int"),
- (np.int64,"int"),
- (np.uint8,"int"),
- (np.uint16,"int"),
- (np.uint32,"int"),
- (np.uint64,"int"),
- (np.int_,"int"),
- (np.intc,"int"),
- (np.intp,"int"),
- ]+types
-
- self.xml_type=dict(types)
- self.python_type=dict(reversed(a)foraintypes)
-
- # This page says that data types in GraphML follow Java(TM).
- # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition
- # true and false are the only boolean literals:
- # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals
- convert_bool={
- # We use data.lower() in actual use.
- "true":True,
- "false":False,
- # Include integer strings for convenience.
- "0":False,
- 0:False,
- "1":True,
- 1:True,
- }
-
- defget_xml_type(self,key):
-"""Wrapper around the xml_type dict that raises a more informative
- exception message when a user attempts to use data of a type not
- supported by GraphML."""
- try:
- returnself.xml_type[key]
- exceptKeyErroraserr:
- raiseTypeError(
- f"GraphML does not support type {type(key)} as data values."
- )fromerr
-
-
-
[docs]defattr_type(self,name,scope,value):
-"""Infer the attribute type of data named name. Currently this only
- supports inference of numeric types.
-
- If self.infer_numeric_types is false, type is used. Otherwise, pick the
- most general of types found across all values with name and scope. This
- means edges with data named 'weight' are treated separately from nodes
- with data named 'weight'.
- """
- ifself.infer_numeric_types:
- types=self.attribute_types[(name,scope)]
-
- iflen(types)>1:
- types={self.get_xml_type(t)fortintypes}
- if"string"intypes:
- returnstr
- elif"float"intypesor"double"intypes:
- returnfloat
- else:
- returnint
- else:
- returnlist(types)[0]
- else:
- returntype(value)
[docs]defadd_data(self,name,element_type,value,scope="all",default=None):
-"""
- Make a data element for an edge or a node. Keep a log of the
- type in the keys table.
- """
- ifelement_typenotinself.xml_type:
- raiseeg.EasyGraphError(
- f"GraphML writer does not support {element_type} as data values."
- )
- keyid=self.get_key(name,self.get_xml_type(element_type),scope,default)
- data_element=self.myElement("data",key=keyid)
- data_element.text=str(value)
- returndata_element
-
-
[docs]defadd_attributes(self,scope,xml_obj,data,default):
-"""Appends attribute data to edges or nodes, and stores type information
- to be added later. See add_graph_element.
- """
- fork,vindata.items():
- self.attribute_types[(str(k),scope)].add(type(v))
- self.attributes[xml_obj].append([k,v,scope,default.get(k)])
[docs]defadd_graph_element(self,G):
-"""
- Serialize graph G in GraphML to the stream.
- """
- ifG.is_directed():
- default_edge_type="directed"
- else:
- default_edge_type="undirected"
-
- graphid=G.graph.pop("id",None)
- ifgraphidisNone:
- graph_element=self.myElement("graph",edgedefault=default_edge_type)
- else:
- graph_element=self.myElement(
- "graph",edgedefault=default_edge_type,id=graphid
- )
- default={}
- data={
- k:v
- for(k,v)inG.graph.items()
- ifknotin["node_default","edge_default"]
- }
- self.add_attributes("graph",graph_element,data,default)
- self.add_nodes(G,graph_element)
- self.add_edges(G,graph_element)
-
- # self.attributes contains a mapping from XML Objects to a list of
- # data that needs to be added to them.
- # We postpone processing in order to do type inference/generalization.
- # See self.attr_type
- forxml_obj,datainself.attributes.items():
- fork,v,scope,defaultindata:
- xml_obj.append(
- self.add_data(
- str(k),self.attr_type(k,scope,v),str(v),scope,default
- )
- )
- self.xml.append(graph_element)
-
-
[docs]defadd_graphs(self,graph_list):
-"""Add many graphs to this GraphML document."""
- forGingraph_list:
- self.add_graph_element(G)
-
-
-classIncrementalElement:
-"""Wrapper for _IncrementalWriter providing an Element like interface.
-
- This wrapper does not intend to be a complete implementation but rather to
- deal with those calls used in GraphMLWriter.
- """
-
- def__init__(self,xml,prettyprint):
- self.xml=xml
- self.prettyprint=prettyprint
-
- defappend(self,element):
- self.xml.write(element,pretty_print=self.prettyprint)
-
-
-classGraphMLWriterLxml(GraphMLWriter):
- def__init__(
- self,
- path,
- graph=None,
- encoding="utf-8",
- prettyprint=True,
- infer_numeric_types=False,
- named_key_ids=False,
- edge_id_from_attribute=None,
- ):
- self.construct_types()
- importlxml.etreeaslxmletree
-
- self.myElement=lxmletree.Element
-
- self._encoding=encoding
- self._prettyprint=prettyprint
- self.named_key_ids=named_key_ids
- self.edge_id_from_attribute=edge_id_from_attribute
- self.infer_numeric_types=infer_numeric_types
-
- self._xml_base=lxmletree.xmlfile(path,encoding=encoding)
- self._xml=self._xml_base.__enter__()
- self._xml.write_declaration()
-
- # We need to have a xml variable that support insertion. This call is
- # used for adding the keys to the document.
- # We will store those keys in a plain list, and then after the graph
- # element is closed we will add them to the main graphml element.
- self.xml=[]
- self._keys=self.xml
- self._graphml=self._xml.element(
- "graphml",
- {
- "xmlns":self.NS_GRAPHML,
- "xmlns:xsi":self.NS_XSI,
- "xsi:schemaLocation":self.SCHEMALOCATION,
- },
- )
- self._graphml.__enter__()
- self.keys={}
- self.attribute_types=defaultdict(set)
-
- ifgraphisnotNone:
- self.add_graph_element(graph)
-
- defadd_graph_element(self,G):
-"""
- Serialize graph G in GraphML to the stream.
- """
- ifG.is_directed():
- default_edge_type="directed"
- else:
- default_edge_type="undirected"
-
- graphid=G.graph.pop("id",None)
- ifgraphidisNone:
- graph_element=self._xml.element("graph",edgedefault=default_edge_type)
- else:
- graph_element=self._xml.element(
- "graph",edgedefault=default_edge_type,id=graphid
- )
-
- # gather attributes types for the whole graph
- # to find the most general numeric format needed.
- # Then pass through attributes to create key_id for each.
- graphdata={
- k:v
- fork,vinG.graph.items()
- ifknotin("node_default","edge_default")
- }
- node_default=G.graph.get("node_default",{})
- edge_default=G.graph.get("edge_default",{})
- # Graph attributes
- fork,vingraphdata.items():
- self.attribute_types[(str(k),"graph")].add(type(v))
- fork,vingraphdata.items():
- element_type=self.get_xml_type(self.attr_type(k,"graph",v))
- self.get_key(str(k),element_type,"graph",None)
- # Nodes and data
- fornode,dinG.nodes.items():
- fork,vind.items():
- self.attribute_types[(str(k),"node")].add(type(v))
- fornode,dinG.nodes.items():
- fork,vind.items():
- T=self.get_xml_type(self.attr_type(k,"node",v))
- self.get_key(str(k),T,"node",node_default.get(k))
- # Edges and data
- ifG.is_multigraph():
- foru,v,ekey,dinG.edges:
- fork,vind.items():
- self.attribute_types[(str(k),"edge")].add(type(v))
- foru,v,ekey,dinG.edges:
- fork,vind.items():
- T=self.get_xml_type(self.attr_type(k,"edge",v))
- self.get_key(str(k),T,"edge",edge_default.get(k))
- else:
- foru,v,dinG.edges:
- fork,vind.items():
- self.attribute_types[(str(k),"edge")].add(type(v))
- foru,v,dinG.edges:
- fork,vind.items():
- T=self.get_xml_type(self.attr_type(k,"edge",v))
- self.get_key(str(k),T,"edge",edge_default.get(k))
-
- # Now add attribute keys to the xml file
- forkeyinself.xml:
- self._xml.write(key,pretty_print=self._prettyprint)
-
- # The incremental_writer writes each node/edge as it is created
- incremental_writer=IncrementalElement(self._xml,self._prettyprint)
- withgraph_element:
- self.add_attributes("graph",incremental_writer,graphdata,{})
- self.add_nodes(G,incremental_writer)# adds attributes too
- self.add_edges(G,incremental_writer)# adds attributes too
-
- defadd_attributes(self,scope,xml_obj,data,default):
-"""Appends attribute data."""
- fork,vindata.items():
- data_element=self.add_data(
- str(k),self.attr_type(str(k),scope,v),str(v),scope,default.get(k)
- )
- xml_obj.append(data_element)
-
- def__str__(self):
- returnobject.__str__(self)
-
- defdump(self):
- self._graphml.__exit__(None,None,None)
- self._xml_base.__exit__(None,None,None)
-
-
-# default is lxml is present.
-write_graphml=write_graphml_lxml
-
-
-
[docs]classGraphMLReader(GraphML):
-"""Read a GraphML document. Produces EasyGraph graph objects."""
-
- def__init__(self,node_type=str,edge_key_type=int,force_multigraph=False):
- self.construct_types()
- self.node_type=node_type
- self.edge_key_type=edge_key_type
- self.multigraph=force_multigraph# If False, test for multiedges
- self.edge_ids={}# dict mapping (u,v) tuples to edge id attributes
-
- def__call__(self,path=None,string=None):
- fromxml.etree.ElementTreeimportElementTree
- fromxml.etree.ElementTreeimportfromstring
-
- ifpathisnotNone:
- self.xml=ElementTree(file=path)
- elifstringisnotNone:
- self.xml=fromstring(string)
- else:
- raiseValueError("Must specify either 'path' or 'string' as kwarg")
- (keys,defaults)=self.find_graphml_keys(self.xml)
- forginself.xml.findall(f"{{{self.NS_GRAPHML}}}graph"):
- yieldself.make_graph(g,keys,defaults)
-
-
[docs]defmake_graph(self,graph_xml,graphml_keys,defaults,G=None):
- # set default graph type
- edgedefault=graph_xml.get("edgedefault",None)
- ifGisNone:
- ifedgedefault=="directed":
- G=eg.MultiDiGraph()
- else:
- G=eg.MultiGraph()
- # set defaults for graph attributes
- G.graph["node_default"]={}
- G.graph["edge_default"]={}
- forkey_id,valueindefaults.items():
- key_for=graphml_keys[key_id]["for"]
- name=graphml_keys[key_id]["name"]
- python_type=graphml_keys[key_id]["type"]
- ifkey_for=="node":
- G.graph["node_default"].update({name:python_type(value)})
- ifkey_for=="edge":
- G.graph["edge_default"].update({name:python_type(value)})
- # hyperedges are not supported
- hyperedge=graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge")
- ifhyperedgeisnotNone:
- raiseeg.EasyGraphError("GraphML reader doesn't support hyperedges")
- # add nodes
- fornode_xmlingraph_xml.findall(f"{{{self.NS_GRAPHML}}}node"):
- self.add_node(G,node_xml,graphml_keys,defaults)
- # add edges
- foredge_xmlingraph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"):
- self.add_edge(G,edge_xml,graphml_keys)
- # add graph data
- data=self.decode_data_elements(graphml_keys,graph_xml)
- G.graph.update(data)
-
- # switch to Graph or DiGraph if no parallel edges were found
- ifself.multigraph:
- returnG
-
- G=eg.DiGraph(G)ifG.is_directed()elseeg.Graph(G)
- # add explicit edge "id" from file as attribute in eg graph.
- eg.set_edge_attributes(G,values=self.edge_ids,name="id")
- returnG
-
-
[docs]defadd_node(self,G,node_xml,graphml_keys,defaults):
-"""Add a node to the graph."""
- # warn on finding unsupported ports tag
- ports=node_xml.find(f"{{{self.NS_GRAPHML}}}port")
- ifportsisnotNone:
- warnings.warn("GraphML port tag not supported.")
- # find the node by id and cast it to the appropriate type
- node_id=self.node_type(node_xml.get("id"))
- # get data/attributes for node
- data=self.decode_data_elements(graphml_keys,node_xml)
- G.add_node(node_id,**data)
- # get child nodes
- ifnode_xml.attrib.get("yfiles.foldertype")=="group":
- graph_xml=node_xml.find(f"{{{self.NS_GRAPHML}}}graph")
- self.make_graph(graph_xml,graphml_keys,defaults,G)
-
-
[docs]defadd_edge(self,G,edge_element,graphml_keys):
-"""Add an edge to the graph."""
- # warn on finding unsupported ports tag
- ports=edge_element.find(f"{{{self.NS_GRAPHML}}}port")
- ifportsisnotNone:
- warnings.warn("GraphML port tag not supported.")
-
- # raise error if we find mixed directed and undirected edges
- directed=edge_element.get("directed")
- ifG.is_directed()anddirected=="false":
- msg="directed=false edge found in directed graph."
- raiseeg.EasyGraphError(msg)
- if(notG.is_directed())anddirected=="true":
- msg="directed=true edge found in undi rected graph."
- raiseeg.EasyGraphError(msg)
-
- source=self.node_type(edge_element.get("source"))
- target=self.node_type(edge_element.get("target"))
- data=self.decode_data_elements(graphml_keys,edge_element)
- # GraphML stores edge ids as an attribute
- # EasyGraph uses them as keys in multigraphs too if no key
- # attribute is specified
- edge_id=edge_element.get("id")
- ifedge_id:
- # self.edge_ids is used by `make_graph` method for non-multigraphs
- self.edge_ids[source,target]=edge_id
- try:
- edge_id=self.edge_key_type(edge_id)
- exceptValueError:# Could not convert.
- pass
- else:
- edge_id=data.get("key")
-
- ifG.has_edge(source,target):
- # mark this as a multigraph
- self.multigraph=True
-
- # Use add_edges_from to avoid error with add_edge when `'key' in data`
- # Note there is only one edge here...
- G.add_edges_from([(source,target,edge_id,data)])
-
-
[docs]defdecode_data_elements(self,graphml_keys,obj_xml):
-"""Use the key information to decode the data XML if present."""
- data={}
- fordata_elementinobj_xml.findall(f"{{{self.NS_GRAPHML}}}data"):
- key=data_element.get("key")
- try:
- data_name=graphml_keys[key]["name"]
- data_type=graphml_keys[key]["type"]
- exceptKeyErroraserr:
- raiseeg.EasyGraphError(f"Bad GraphML data: no key {key}")fromerr
- text=data_element.text
- # assume anything with subelements is a yfiles extension
- iftextisnotNoneandlen(list(data_element))==0:
- ifdata_type==bool:
- # Ignore cases.
- # http://docs.oracle.com/javase/6/docs/api/java/lang/
- # Boolean.html#parseBoolean%28java.lang.String%29
- data[data_name]=self.convert_bool[text.lower()]
- else:
- data[data_name]=data_type(text)
- eliflen(list(data_element))>0:
- # Assume yfiles as subelements, try to extract node_label
- node_label=None
- # set GenericNode's configuration as shape type
- gn=data_element.find(f"{{{self.NS_Y}}}GenericNode")
- ifgn:
- data["shape_type"]=gn.get("configuration")
- fornode_typein["GenericNode","ShapeNode","SVGNode","ImageNode"]:
- pref=f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}"
- geometry=data_element.find(f"{pref}Geometry")
- ifgeometryisnotNone:
- data["x"]=geometry.get("x")
- data["y"]=geometry.get("y")
- ifnode_labelisNone:
- node_label=data_element.find(f"{pref}NodeLabel")
- shape=data_element.find(f"{pref}Shape")
- ifshapeisnotNone:
- data["shape_type"]=shape.get("type")
- ifnode_labelisnotNone:
- data["label"]=node_label.text
-
- # check all the different types of edges avaivable in yEd.
- foredge_typein[
- "PolyLineEdge",
- "SplineEdge",
- "QuadCurveEdge",
- "BezierEdge",
- "ArcEdge",
- ]:
- pref=f"{{{self.NS_Y}}}{edge_type}/{{{self.NS_Y}}}"
- edge_label=data_element.find(f"{pref}EdgeLabel")
- ifedge_labelisnotNone:
- break
-
- ifedge_labelisnotNone:
- data["label"]=edge_label.text
- returndata
-
-
[docs]deffind_graphml_keys(self,graph_element):
-"""Extracts all the keys and key defaults from the xml."""
- graphml_keys={}
- graphml_key_defaults={}
- forkingraph_element.findall(f"{{{self.NS_GRAPHML}}}key"):
- attr_id=k.get("id")
- attr_type=k.get("attr.type")
- attr_name=k.get("attr.name")
- yfiles_type=k.get("yfiles.type")
- ifyfiles_typeisnotNone:
- attr_name=yfiles_type
- attr_type="yfiles"
- ifattr_typeisNone:
- attr_type="string"
- warnings.warn(f"No key type for id {attr_id}. Using string")
- ifattr_nameisNone:
- raiseeg.EasyGraphError(f"Unknown key for id {attr_id}.")
- graphml_keys[attr_id]={
- "name":attr_name,
- "type":self.python_type[attr_type],
- "for":k.get("for"),
- }
- # check for "default" sub-element of key element
- default=k.find(f"{{{self.NS_GRAPHML}}}default")
- ifdefaultisnotNone:
- # Handle default values identically to data element values
- python_type=graphml_keys[attr_id]["type"]
- ifpython_type==bool:
- graphml_key_defaults[attr_id]=self.convert_bool[
- default.text.lower()
- ]
- else:
- graphml_key_defaults[attr_id]=python_type(default.text)
- returngraphml_keys,graphml_key_defaults
[docs]deffrom_agraph(A,create_using=None):
-"""Returns a EasyGraph Graph or DiGraph from a PyGraphviz graph.
-
- Parameters
- ----------
- A : PyGraphviz AGraph
- A graph created with PyGraphviz
-
- create_using : EasyGraph graph constructor, optional (default=None)
- Graph type to create. If graph instance, then cleared before populated.
- If `None`, then the appropriate Graph type is inferred from `A`.
-
- Examples
- --------
- >>> K5 = eg.complete_graph(5)
- >>> A = eg.to_agraph(K5)
- >>> G = eg.from_agraph(A)
-
- Notes
- -----
- The Graph G will have a dictionary G.graph_attr containing
- the default graphviz attributes for graphs, nodes and edges.
-
- Default node attributes will be in the dictionary G.node_attr
- which is keyed by node.
-
- Edge attributes will be returned as edge data in G. With
- edge_attr=False the edge data will be the Graphviz edge weight
- attribute or the value 1 if no edge weight attribute is found.
-
- """
- ifcreate_usingisNone:
- ifA.is_directed():
- ifA.is_strict():
- create_using=eg.DiGraph
- else:
- create_using=eg.MultiDiGraph
- else:
- ifA.is_strict():
- create_using=eg.Graph
- else:
- create_using=eg.MultiGraph
-
- # assign defaults
- N=eg.empty_graph(0,create_using)
- ifA.nameisnotNone:
- N.name=A.name
-
- # add graph attributes
- N.graph.update(A.graph_attr)
-
- # add nodes, attributes to N.node_attr
- forninA.nodes():
- str_attr={str(k):vfork,vinn.attr.items()}
- N.add_node(str(n),**str_attr)
-
- # add edges, assign edge data as dictionary of attributes
- foreinA.edges():
- u,v=str(e[0]),str(e[1])
- attr=dict(e.attr)
- str_attr={str(k):vfork,vinattr.items()}
- ifnotN.is_multigraph():
- ife.nameisnotNone:
- str_attr["key"]=e.name
- N.add_edge(u,v,**str_attr)
- else:
- N.add_edge(u,v,key=e.name,**str_attr)
-
- # add default attributes for graph, nodes, and edges
- # hang them on N.graph_attr
- N.graph["graph"]=dict(A.graph_attr)
- N.graph["node"]=dict(A.node_attr)
- N.graph["edge"]=dict(A.edge_attr)
- returnN
-
-
-
[docs]defto_agraph(N):
-"""Returns a pygraphviz graph from a EasyGraph graph N.
-
- Parameters
- ----------
- N : EasyGraph graph
- A graph created with EasyGraph
-
- Examples
- --------
- >>> K5 = eg.complete_graph(5)
- >>> A = eg.to_agraph(K5)
-
- Notes
- -----
- If N has an dict N.graph_attr an attempt will be made first
- to copy properties attached to the graph (see from_agraph)
- and then updated with the calling arguments if any.
-
- """
- try:
- importpygraphviz
- exceptImportErroraserr:
- raiseImportError("requires pygraphviz http://pygraphviz.github.io/")fromerr
- directed=N.is_directed()
- strict=eg.number_of_selfloops(N)==0andnotN.is_multigraph()
- A=pygraphviz.AGraph(name=N.name,strict=strict,directed=directed)
-
- # default graph attributes
- A.graph_attr.update(N.graph.get("graph",{}))
- A.node_attr.update(N.graph.get("node",{}))
- A.edge_attr.update(N.graph.get("edge",{}))
-
- A.graph_attr.update(
- (k,v)fork,vinN.graph.items()ifknotin("graph","node","edge")
- )
-
- # add nodes
- forn,nodedatainN.nodes.items():
- A.add_node(n)
- # Add node data
- a=A.get_node(n)
- a.attr.update({k:str(v)fork,vinnodedata.items()})
-
- # loop over edges
- ifN.is_multigraph():
- foru,v,key,edgedatainN.edges:
- str_edgedata={k:str(v)fork,vinedgedata.items()ifk!="key"}
- A.add_edge(u,v,key=str(key))
- # Add edge data
- a=A.get_edge(u,v)
- a.attr.update(str_edgedata)
-
- else:
- foru,v,edgedatainN.edges:
- str_edgedata={k:str(v)fork,vinedgedata.items()}
- A.add_edge(u,v)
- # Add edge data
- a=A.get_edge(u,v)
- a.attr.update(str_edgedata)
-
- returnA
-
-
-
[docs]defwrite_dot(G,path):
-"""Write EasyGraph graph G to Graphviz dot format on path.
-
- Parameters
- ----------
- G : graph
- A easygraph graph
- path : filename
- Filename or file handle to write
- """
- A=to_agraph(G)
- A.write(path)
- A.clear()
- return
-
-
-
[docs]defread_dot(path):
-"""Returns a EasyGraph graph from a dot file on path.
-
- Parameters
- ----------
- path : file or string
- File name or file handle to read.
- """
- try:
- importpygraphviz
- exceptImportErroraserr:
- raiseImportError(
- "read_dot() requires pygraphviz http://pygraphviz.github.io/"
- )fromerr
- A=pygraphviz.AGraph(file=path)
- gr=from_agraph(A)
- A.clear()
- returngr
-# This file is part of the NetworkX distribution.
-
-# NetworkX is distributed with the 3-clause BSD license.
-
-
-# ::
-# Copyright (C) 2004-2022, NetworkX Developers
-# Aric Hagberg <hagberg@lanl.gov>
-# Dan Schult <dschult@colgate.edu>
-# Pieter Swart <swart@lanl.gov>
-# All rights reserved.
-
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-
-# * Neither the name of the NetworkX Developers nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""
-*****
-Pajek
-*****
-Read graphs in Pajek format.
-
-This implementation handles directed and undirected graphs including
-those with self loops and parallel edges.
-
-Format
-------
-See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
-for format information.
-
-"""
-
-importwarnings
-
-importeasygraphaseg
-
-# import networkx as nx
-fromeasygraph.utilsimportopen_file
-
-
-__all__=["read_pajek","parse_pajek","generate_pajek","write_pajek"]
-
-
-
[docs]defgenerate_pajek(G):
-"""Generate lines in Pajek graph format.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph
-
- References
- ----------
- See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
- for format information.
- """
- ifG.name=="":
- name="EasyGraph"
- else:
- name=G.name
- # Apparently many Pajek format readers can't process this line
- # So we'll leave it out for now.
- # yield '*network %s'%name
-
- # write nodes with attributes
- yieldf"*vertices {G.order()}"
- nodes=list(G)
- # make dictionary mapping nodes to integers
- nodenumber=dict(zip(nodes,range(1,len(nodes)+1)))
- forninnodes:
- # copy node attributes and pop mandatory attributes
- # to avoid duplication.
- na=G.nodes.get(n,{}).copy()
- x=na.pop("x",0.0)
- y=na.pop("y",0.0)
- try:
- id=int(na.pop("id",nodenumber[n]))
- exceptValueErroraserr:
- err.args+=(
- "Pajek format requires 'id' to be an int()."
- " Refer to the 'Relabeling nodes' section.",
- )
- raise
- nodenumber[n]=id
- shape=na.pop("shape","ellipse")
- s=" ".join(map(make_qstr,(id,n,x,y,shape)))
- # only optional attributes are left in na.
- fork,vinna.items():
- ifisinstance(v,str)andv.strip()!="":
- s+=f" {make_qstr(k)}{make_qstr(v)}"
- else:
- warnings.warn(
- f"Node attribute {k} is not processed."
- f" {('Empty attribute'ifisinstance(v,str)else'Non-string attribute')}."
- )
- yields
-
- # write edges with attributes
- ifG.is_directed():
- yield"*arcs"
- else:
- yield"*edges"
- # from icecream import ic
- # ic(G.edges)
- # if isinstance(G, MultiGraph)
- foru,v,*edgedatainG.edges:
- # if len(edgedata) > 1:
- # edgedata = edgedata[1]
- # else:
- # edgedata = edgedata[0]
- edgedata=edgedata[-1]
- d=edgedata.copy()
- value=d.pop("weight",1.0)# use 1 as default edge value
- s=" ".join(map(make_qstr,(nodenumber[u],nodenumber[v],value)))
- fork,vind.items():
- ifisinstance(v,str)andv.strip()!="":
- s+=f" {make_qstr(k)}{make_qstr(v)}"
- else:
- warnings.warn(
- f"Edge attribute {k} is not processed."
- f" {('Empty attribute'ifisinstance(v,str)else'Non-string attribute')}."
- )
- yields
-
-
-
[docs]@open_file(1,mode="wb")
-defwrite_pajek(G,path,encoding="UTF-8"):
-"""Write graph in Pajek format to path.
-
- Parameters
- ----------
- G : graph
- A EasyGraph graph
- path : file or string
- File or filename to write.
- Filenames ending in .gz or .bz2 will be compressed.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> eg.write_pajek(G, "test.net")
-
- Warnings
- --------
- Optional node attributes and edge attributes must be non-empty strings.
- Otherwise it will not be written into the file. You will need to
- convert those attributes to strings if you want to keep them.
-
- References
- ----------
- See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
- for format information.
- """
- forlineingenerate_pajek(G):
- line+="\n"
- path.write(line.encode(encoding))
-
-
-
[docs]@open_file(0,mode="rb")
-defread_pajek(path):
-"""Read graph in Pajek format from path.
-
- Parameters
- ----------
- path : file or string
- File or filename to write.
- Filenames ending in .gz or .bz2 will be uncompressed.
-
- Returns
- -------
- G : EasyGraph MultiGraph or MultiDiGraph.
-
- Examples
- --------
- >>> G = eg.path_graph(4)
- >>> eg.write_pajek(G, "test.net")
- >>> G = eg.read_pajek("test.net")
-
- To create a Graph instead of a MultiGraph use
-
- >>> G1 = eg.Graph(G)
-
- References
- ----------
- See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
- for format information.
- """
- lines=(line.decode()forlineinpath)
- # with open(path) as f:
- # lines = f.readlines()
- returnparse_pajek(lines)
-
-
-
[docs]defparse_pajek(lines):
-"""Parse Pajek format graph from string or iterable.
-
- Parameters
- ----------
- lines : string or iterable
- Data in Pajek format.
-
- Returns
- -------
- G : EasyGraph graph
-
- See Also
- --------
- read_pajek
-
- """
- importshlex
-
- # multigraph=False
- ifisinstance(lines,str):
- lines=iter(lines.split("\n"))
- # from itertools import tee
- # lines, lines2 = tee(lines)
- # from icecream import ic
- # ic(next(lines2))
- lines=iter([line.rstrip("\n")forlineinlines])
- G=eg.MultiDiGraph()# are multiedges allowed in Pajek? assume yes
- labels=[]# in the order of the file, needed for matrix
- whilelines:
- try:
- l=next(lines)
- except:# EOF
- break
- ifl.lower().startswith("*network"):
- try:
- label,name=l.split(None,1)
- exceptValueError:
- # Line was not of the form: *network NAME
- pass
- else:
- G.graph["name"]=name
- elifl.lower().startswith("*vertices"):
- nodelabels={}
- l,nnodes=l.split()
- foriinrange(int(nnodes)):
- l=next(lines)
- try:
- splitline=[xforxinshlex.split(str(l))]
- exceptAttributeError:
- splitline=shlex.split(str(l))
- id,label=splitline[0:2]
- labels.append(label)
- G.add_node(label)
- nodelabels[id]=label
- G.nodes[label]["id"]=id
- try:
- x,y,shape=splitline[2:5]
- G.nodes[label].update(
- {"x":float(x),"y":float(y),"shape":shape}
- )
- except:
- pass
- extra_attr=zip(splitline[5::2],splitline[6::2])
- G.nodes[label].update(extra_attr)
- elifl.lower().startswith("*edges")orl.lower().startswith("*arcs"):
- ifl.lower().startswith("*edge"):
- # switch from multidigraph to multigraph
- G=eg.MultiGraph(G)
- ifl.lower().startswith("*arcs"):
- # switch to directed with multiple arcs for each existing edge
- # G = G.to_directed()
- pass
- forlinlines:
- try:
- splitline=[xforxinshlex.split(str(l))]
- exceptAttributeError:
- splitline=shlex.split(str(l))
-
- iflen(splitline)<2:
- continue
- ui,vi=splitline[0:2]
- u=nodelabels.get(ui,ui)
- v=nodelabels.get(vi,vi)
- # parse the data attached to this edge and put in a dictionary
- edge_data={}
- try:
- # there should always be a single value on the edge?
- w=splitline[2:3]
- edge_data.update({"weight":float(w[0])})
- except:
- pass
- # if there isn't, just assign a 1
- # edge_data.update({'value':1})
- extra_attr=zip(splitline[3::2],splitline[4::2])
- edge_data.update(extra_attr)
- # if G.has_edge(u,v):
- # multigraph=True
- G.add_edge(u,v,**edge_data)
- elifl.lower().startswith("*matrix"):
- G=eg.DiGraph(G)
- adj_list=(
- (labels[row],labels[col],{"weight":int(data)})
- for(row,line)inenumerate(lines)
- for(col,data)inenumerate(line.split())
- ifint(data)!=0
- )
- G.add_edges_from(adj_list)
-
- returnG
-
-
-defmake_qstr(t):
-"""Returns the string representation of t.
- Add outer double-quotes if the string has a space.
- """
- ifnotisinstance(t,str):
- t=str(t)
- if" "int:
- t=f'"{t}"'
- returnt
-
-"""
-**************
-UCINET DL
-**************
-Read and write graphs in UCINET DL format.
-This implementation currently supports only the 'fullmatrix' data format.
-Format
-------
-The UCINET DL format is the most common file format used by UCINET package.
-Basic example:
-DL N = 5
-Data:
-0 1 1 1 1
-1 0 1 0 0
-1 1 0 0 1
-1 0 0 0 0
-1 0 1 0 0
-References
-----------
- See UCINET User Guide or http://www.analytictech.com/ucinet/help/hs5000.htm
- for full format information. Short version on http://www.analytictech.com/networks/dataentry.htm
-"""
-
-
-importre
-importshlex
-
-importeasygraphaseg
-importnumpyasnp
-
-fromeasygraph.utilsimportopen_file
-
-
-__all__=["generate_ucinet","read_ucinet","parse_ucinet","write_ucinet"]
-
-
-
[docs]defgenerate_ucinet(G):
-"""Generate lines in UCINET graph format.
- Parameters
- ----------
- G : graph
- A EasyGraph graph
- Examples
- --------
- Notes
- -----
- The default format 'fullmatrix' is used (for UCINET DL format).
-
- References
- ----------
- See UCINET User Guide or http://www.analytictech.com/ucinet/help/hs5000.htm
- for full format information. Short version on http://www.analytictech.com/networks/dataentry.htm
- """
-
- n=G.number_of_nodes()
- nodes=sorted(list(G.nodes))
- yield"dl n=%i format=fullmatrix"%n
-
- # Labels
- try:
- int(nodes[0])
- exceptValueError:
- s="labels:\n"
- forlabelinnodes:
- s+=label+" "
- yields
-
- yield"data:"
-
- yieldstr(np.asmatrix(eg.to_numpy_array(G,nodelist=nodes,dtype=int))).replace(
- "["," "
- ).replace("]"," ").lstrip().rstrip()
-
-
-
[docs]@open_file(0,mode="rb")
-defread_ucinet(path,encoding="UTF-8"):
-"""Read graph in UCINET format from path.
- Parameters
- ----------
- path : file or string
- File or filename to read.
- Filenames ending in .gz or .bz2 will be uncompressed.
- Returns
- -------
- G : EasyGraph MultiGraph or MultiDiGraph.
- Examples
- --------
- >>> G=eg.path_graph(4)
- >>> eg.write_ucinet(G, "test.dl")
- >>> G=eg.read_ucinet("test.dl")
- To create a Graph instead of a MultiGraph use
- >>> G1=eg.Graph(G)
- See Also
- --------
- parse_ucinet()
- References
- ----------
- See UCINET User Guide or http://www.analytictech.com/ucinet/help/hs5000.htm
- for full format information. Short version on http://www.analytictech.com/networks/dataentry.htm
- """
- lines=(line.decode(encoding)forlineinpath)
- returnparse_ucinet(lines)
-
-
-
[docs]@open_file(1,mode="wb")
-defwrite_ucinet(G,path,encoding="UTF-8"):
-"""Write graph in UCINET format to path.
- Parameters
- ----------
- G : graph
- A EasyGraph graph
- path : file or string
- File or filename to write.
- Filenames ending in .gz or .bz2 will be compressed.
- Examples
- --------
- >>> G=eg.path_graph(4)
- >>> eg.write_ucinet(G, "test.net")
- References
- ----------
- See UCINET User Guide or http://www.analytictech.com/ucinet/help/hs5000.htm
- for full format information. Short version on http://www.analytictech.com/networks/dataentry.htm
- """
- forlineingenerate_ucinet(G):
- line+="\n"
- path.write(line.encode(encoding))
-
-
-
[docs]defparse_ucinet(lines):
-"""Parse UCINET format graph from string or iterable.
- Currently only the 'fullmatrix', 'nodelist1' and 'nodelist1b' formats are supported.
- Parameters
- ----------
- lines : string or iterable
- Data in UCINET format.
- Returns
- -------
- G : EasyGraph graph
- See Also
- --------
- read_ucinet()
- References
- ----------
- See UCINET User Guide or http://www.analytictech.com/ucinet/help/hs5000.htm
- for full format information. Short version on http://www.analytictech.com/networks/dataentry.htm
- """
- fromnumpyimportgenfromtxt
- fromnumpyimportisnan
- fromnumpyimportreshape
-
- G=eg.MultiDiGraph()
-
- ifnotisinstance(lines,str):
- s=""
- forlineinlines:
- iftype(line)==bytes:
- s+=line.decode("utf-8")
- else:
- s+=line
- lines=s
- lexer=shlex.shlex(lines.lower())
- lexer.whitespace+=",="
- lexer.whitespace_split=True
-
- number_of_nodes=0
- number_of_matrices=0
- nr=0# number of rows (rectangular matrix)
- nc=0# number of columns (rectangular matrix)
- ucinet_format="fullmatrix"# Format by default
- labels={}# Contains labels of nodes
- row_labels_embedded=False# Whether labels are embedded in data or not
- cols_labels_embedded=False
- diagonal=True# whether the main diagonal is present or absent
-
- KEYWORDS=("format","data:","labels:")# TODO remove ':' in keywords
-
- whilelexer:
- try:
- token=next(lexer)
- exceptStopIteration:
- break
- # print "Token : %s" % token
- iftoken.startswith("n"):
- iftoken.startswith("nr"):
- nr=int(get_param(r"\d+",token,lexer))
- number_of_nodes=max(nr,nc)
- eliftoken.startswith("nc"):
- nc=int(get_param(r"\d+",token,lexer))
- number_of_nodes=max(nr,nc)
- eliftoken.startswith("nm"):
- number_of_matrices=int(get_param(r"\d+",token,lexer))
- else:
- number_of_nodes=int(get_param(r"\d+",token,lexer))
- nr=number_of_nodes
- nc=number_of_nodes
-
- eliftoken.startswith("diagonal"):
- diagonal=get_param("present|absent",token,lexer)
-
- eliftoken.startswith("format"):
- ucinet_format=get_param(
-"""^(fullmatrix|upperhalf|lowerhalf|nodelist1|nodelist2|nodelist1b|\
-edgelist1|edgelist2|blockmatrix|partition)$""",
- token,
- lexer,
- )
-
- # TODO : row and columns labels
- eliftoken.startswith("row"):# Row labels
- pass
- eliftoken.startswith("column"):# Columns labels
- pass
-
- eliftoken.startswith("labels"):
- token=next(lexer)
- i=0
- whiletokennotinKEYWORDS:
- iftoken.startswith("embedded"):
- row_labels_embedded=True
- cols_labels_embedded=True
- break
- else:
- labels[i]=token.replace(
- '"',""
- )# for labels with embedded spaces
- i+=1
- try:
- token=next(lexer)
- exceptStopIteration:
- break
- eliftoken.startswith("data"):
- break
-
- data_lines=lines.lower().split("data:",1)[1]
- # Generate edges
- params={}
- ifcols_labels_embedded:
- # params['names'] = True
- labels=dict(zip(range(0,nc),data_lines.splitlines()[1].split()))
- # params['skip_header'] = 2 # First character is \n
- ifrow_labels_embedded:# Skip first column
- # TODO rectangular case : labels can differ from rows to columns
- # params['usecols'] = range(1, nc + 1)
- pass
-
- ifucinet_format=="fullmatrix":
- # In Python3 genfromtxt requires bytes string
- try:
- data_lines=bytes(data_lines,"utf-8")
- exceptTypeError:
- pass
- # Do not use splitlines() because it is not necessarily written as a square matrix
- data=genfromtxt([data_lines],case_sensitive=False,**params)
- ifcols_labels_embeddedorrow_labels_embedded:
- # data = insert(data, 0, float('nan'))
- data=data[~isnan(data)]
- mat=reshape(data,(max(number_of_nodes,nr),-1))
- G=eg.from_numpy_array(mat,create_using=eg.MultiDiGraph())
-
- elifucinet_formatin(
- "nodelist1",
- "nodelist1b",
- ):# Since genfromtxt only accepts square matrix...
- s=""
- fori,lineinenumerate(data_lines.splitlines()):
- row=line.split()
- ifrow:
- ifucinet_format=="nodelist1b"androw[0]=="0":
- pass
- else:
- forneighborinrow[1:]:
- ifucinet_format=="nodelist1":
- source=row[0]
- else:
- source=str(i)
- s+=source+" "+neighbor+"\n"
-
- G=eg.parse_edgelist(
- s.splitlines(),
- nodetype=strifrow_labels_embeddedandcols_labels_embeddedelseint,
- create_using=eg.MultiDiGraph(),
- )
-
- ifnotrow_labels_embeddedornotcols_labels_embedded:
- G=eg.relabel_nodes(G,dict(zip(list(G.nodes),[i-1foriinG.nodes])))
-
- elifucinet_format=="edgelist1":
- G=eg.parse_edgelist(
- data_lines.splitlines(),
- nodetype=strifrow_labels_embeddedandcols_labels_embeddedelseint,
- create_using=eg.MultiDiGraph(),
- )
-
- ifnotrow_labels_embeddedornotcols_labels_embedded:
- G=eg.relabel_nodes(G,dict(zip(list(G.nodes),[i-1foriinG.nodes])))
-
- # Relabel nodes
- iflabels:
- try:
- iflen(list(G.nodes))<number_of_nodes:
- G.add_nodes_from(
- labels.values()iflabelselserange(0,number_of_nodes)
- )
- G=eg.relabel_nodes(G,labels)
- exceptKeyError:
- pass# Nodes already labelled
-
- returnG
-
-
-defget_param(regex,token,lines):
-"""
- Get a parameter value in UCINET DL file
- :param regex: string with the regex matching the parameter value
- :param token: token (string) in which we search for the parameter
- :param lines: to iterate through the next tokens
- :return:
- """
- n=token
- query=re.search(regex,n)
- whilequeryisNone:
- try:
- n=next(lines)
- exceptStopIteration:
- raiseException("Parameter %s value not recognized"%token)
- query=re.search(regex,n)
- returnquery.group()
-
[docs]defto_scipy_sparse_array(G,nodelist=None,dtype=None,weight="weight",format="csr"):
-"""Returns the graph adjacency matrix as a SciPy sparse array.
-
- Parameters
- ----------
- G : graph
- The EasyGraph graph used to construct the sparse matrix.
-
- nodelist : list, optional
- The rows and columns are ordered according to the nodes in `nodelist`.
- If `nodelist` is None, then the ordering is produced by G.nodes().
-
- dtype : NumPy data-type, optional
- A valid NumPy dtype used to initialize the array. If None, then the
- NumPy default is used.
-
- weight : string or None optional (default='weight')
- The edge attribute that holds the numerical value used for
- the edge weight. If None then all edge weights are 1.
-
- format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
- The type of the matrix to be returned (default 'csr'). For
- some algorithms different implementations of sparse matrices
- can perform better. See [1]_ for details.
-
- Returns
- -------
- A : SciPy sparse array
- Graph adjacency matrix.
-
- Notes
- -----
- For directed graphs, matrix entry i,j corresponds to an edge from i to j.
-
- The matrix entries are populated using the edge attribute held in
- parameter weight. When an edge does not have that attribute, the
- value of the entry is 1.
-
- For multiple edges the matrix values are the sums of the edge weights.
-
- When `nodelist` does not contain every node in `G`, the adjacency matrix
- is built from the subgraph of `G` that is induced by the nodes in
- `nodelist`.
-
- The convention used for self-loop edges in graphs is to assign the
- diagonal matrix entry value to the weight attribute of the edge
- (or the number 1 if the edge has no weight attribute). If the
- alternate convention of doubling the edge weight is desired the
- resulting Scipy sparse matrix can be modified as follows:
-
- >>> G = eg.Graph([(1, 1)])
- >>> A = eg.to_scipy_sparse_array(G)
- >>> print(A.todense())
- [[1]]
- >>> A.setdiag(A.diagonal() * 2)
- >>> print(A.toarray())
- [[2]]
-
- Examples
- --------
- >>> S = eg.to_scipy_sparse_array(G, nodelist=[0, 1, 2])
- >>> print(S.toarray())
- [[0 2 0]
- [1 0 0]
- [0 0 4]]
-
- References
- ----------
- .. [1] Scipy Dev. References, "Sparse Matrices",
- https://docs.scipy.org/doc/scipy/reference/sparse.html
- """
- importscipyassp
- importscipy.sparse# call as sp.sparse
-
- iflen(G)==0:
- raiseeg.EasyGraphError("Graph has no nodes or edges")
-
- ifnodelistisNone:
- nodelist=list(G)
- nlen=len(G)
- else:
- nlen=len(nodelist)
- ifnlen==0:
- raiseeg.EasyGraphError("nodelist has no nodes")
- nodeset=set(G.nbunch_iter(nodelist))
- ifnlen!=len(nodeset):
- forninnodelist:
- ifnnotinG:
- raiseeg.EasyGraphError(f"Node {n} in nodelist is not in G")
- raiseeg.EasyGraphError("nodelist contains duplicates.")
- ifnlen<len(G):
- G=G.subgraph(nodelist)
-
- index=dict(zip(nodelist,range(nlen)))
-
- # G.edges(data=weight, default=1)
-
- coefficients=zip(
- *((index[u],index[v],wt.get("weight",1))foru,v,wtinG.edges)
- )
- try:
- row,col,data=coefficients
- exceptValueError:
- # there is no edge in the subgraph
- row,col,data=[],[],[]
-
- ifG.is_directed():
- A=sp.sparse.coo_array((data,(row,col)),shape=(nlen,nlen),dtype=dtype)
- else:
- # symmetrize matrix
- d=data+data
- r=row+col
- c=col+row
- # selfloop entries get double counted when symmetrizing
- # so we subtract the data on the diagonal
- selfloops=list(eg.selfloop_edges(G,data=weight,default=1))
- ifselfloops:
- diag_index,diag_data=zip(*((index[u],-wt)foru,v,wtinselfloops))
- d+=diag_data
- r+=diag_index
- c+=diag_index
- A=sp.sparse.coo_array((d,(r,c)),shape=(nlen,nlen),dtype=dtype)
- try:
- returnA.asformat(format)
- exceptValueErroraserr:
- raiseeg.EasyGraphError(f"Unknown sparse matrix format: {format}")fromerr
-
-
-
[docs]defto_scipy_sparse_matrix(G,nodelist=None,dtype=None,weight="weight",format="csr"):
-"""Returns the graph adjacency matrix as a SciPy sparse matrix.
-
- Parameters
- ----------
- G : graph
- The EasyGraph graph used to construct the sparse matrix.
-
- nodelist : list, optional
- The rows and columns are ordered according to the nodes in `nodelist`.
- If `nodelist` is None, then the ordering is produced by G.nodes().
-
- dtype : NumPy data-type, optional
- A valid NumPy dtype used to initialize the array. If None, then the
- NumPy default is used.
-
- weight : string or None optional (default='weight')
- The edge attribute that holds the numerical value used for
- the edge weight. If None then all edge weights are 1.
-
- format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
- The type of the matrix to be returned (default 'csr'). For
- some algorithms different implementations of sparse matrices
- can perform better. See [1]_ for details.
-
- Returns
- -------
- A : SciPy sparse matrix
- Graph adjacency matrix.
-
- Notes
- -----
- For directed graphs, matrix entry i,j corresponds to an edge from i to j.
-
- The matrix entries are populated using the edge attribute held in
- parameter weight. When an edge does not have that attribute, the
- value of the entry is 1.
-
- For multiple edges the matrix values are the sums of the edge weights.
-
- When `nodelist` does not contain every node in `G`, the adjacency matrix
- is built from the subgraph of `G` that is induced by the nodes in
- `nodelist`.
-
- The convention used for self-loop edges in graphs is to assign the
- diagonal matrix entry value to the weight attribute of the edge
- (or the number 1 if the edge has no weight attribute). If the
- alternate convention of doubling the edge weight is desired the
- resulting Scipy sparse matrix can be modified as follows:
-
- >>> G = eg.Graph([(1, 1)])
- >>> A = eg.to_scipy_sparse_matrix(G)
- >>> print(A.todense())
- [[1]]
- >>> A.setdiag(A.diagonal() * 2)
- >>> print(A.todense())
- [[2]]
-
- Examples
- --------
-
- >>> G.add_edge(1, 0)
- 0
- >>> G.add_edge(2, 2, weight=3)
- 0
- >>> G.add_edge(2, 2)
- 1
- >>> S = eg.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2])
- >>> print(S.todense())
- [[0 2 0]
- [1 0 0]
- [0 0 4]]
-
- References
- ----------
- .. [1] Scipy Dev. References, "Sparse Matrices",
- https://docs.scipy.org/doc/scipy/reference/sparse.html
- """
- importscipyassp
- importscipy.sparse
-
- A=to_scipy_sparse_array(
- G,nodelist=nodelist,dtype=dtype,weight=weight,format=format
- )
- returnsp.sparse.csr_matrix(A).asformat(format)
-
-
-
[docs]defto_numpy_matrix(G,edge_sign=1.0,not_edge_sign=0.0):
-"""
- Returns the graph adjacency matrix as a NumPy matrix.
-
- Parameters
- ----------
- edge_sign : float
- Sign for the position of matrix where there is an edge
-
- not_edge_sign : float
- Sign for the position of matrix where there is no edge
-
- """
- importnumpyasnp
-
- index_of_node=dict(zip(G.nodes,range(len(G))))
- N=len(G)
- M=np.full((N,N),not_edge_sign)
-
- foru,udictinG.adj.items():
- forv,datainudict.items():
- M[index_of_node[u],index_of_node[v]]=edge_sign
-
- M=np.asmatrix(M)
- returnM
-
-
-
[docs]deffrom_numpy_array(A,parallel_edges=False,create_using=None):
-"""Returns a graph from a 2D NumPy array.
-
- The 2D NumPy array is interpreted as an adjacency matrix for the graph.
-
- Parameters
- ----------
- A : a 2D numpy.ndarray
- An adjacency matrix representation of a graph
-
- parallel_edges : Boolean
- If this is True, `create_using` is a multigraph, and `A` is an
- integer array, then entry *(i, j)* in the array is interpreted as the
- number of parallel edges joining vertices *i* and *j* in the graph.
- If it is False, then the entries in the array are interpreted as
- the weight of a single edge joining the vertices.
-
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
-
- Notes
- -----
- For directed graphs, explicitly mention create_using=eg.DiGraph,
- and entry i,j of A corresponds to an edge from i to j.
-
- If `create_using` is :class:`easygraph.MultiGraph` or
- :class:`easygraph.MultiDiGraph`, `parallel_edges` is True, and the
- entries of `A` are of type :class:`int`, then this function returns a
- multigraph (of the same type as `create_using`) with parallel edges.
-
- If `create_using` indicates an undirected multigraph, then only the edges
- indicated by the upper triangle of the array `A` will be added to the
- graph.
-
- If the NumPy array has a single data type for each array entry it
- will be converted to an appropriate Python data type.
-
- If the NumPy array has a user-specified compound data type the names
- of the data fields will be used as attribute keys in the resulting
- EasyGraph graph.
-
- See Also
- --------
- to_numpy_array
-
- Examples
- --------
- Simple integer weights on edges:
-
- >>> import numpy as np
- >>> A = np.array([[1, 1], [2, 1]])
- >>> G = eg.from_numpy_array(A)
- >>> G.edges(data=True)
- EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})])
-
- If `create_using` indicates a multigraph and the array has only integer
- entries and `parallel_edges` is False, then the entries will be treated
- as weights for edges joining the nodes (without creating parallel edges):
-
- >>> A = np.array([[1, 1], [1, 2]])
- >>> G = eg.from_numpy_array(A, create_using=eg.MultiGraph)
- >>> G[1][1]
- AtlasView({0: {'weight': 2}})
-
- If `create_using` indicates a multigraph and the array has only integer
- entries and `parallel_edges` is True, then the entries will be treated
- as the number of parallel edges joining those two vertices:
-
- >>> A = np.array([[1, 1], [1, 2]])
- >>> temp = eg.MultiGraph()
- >>> G = eg.from_numpy_array(A, parallel_edges=True, create_using=temp)
- >>> G[1][1]
- AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
-
- User defined compound data type on edges:
-
- >>> dt = [("weight", float), ("cost", int)]
- >>> A = np.array([[(1.0, 2)]], dtype=dt)
- >>> G = eg.from_numpy_array(A)
- >>> G.edges()
- EdgeView([(0, 0)])
- >>> G[0][0]["cost"]
- 2
- >>> G[0][0]["weight"]
- 1.0
-
- """
- kind_to_python_type={
- "f":float,
- "i":int,
- "u":int,
- "b":bool,
- "c":complex,
- "S":str,
- "U":str,
- "V":"void",
- }
- G=eg.empty_graph(0,create_using)
- ifA.ndim!=2:
- raiseeg.EasyGraphError(f"Input array must be 2D, not {A.ndim}")
- n,m=A.shape
- ifn!=m:
- raiseeg.EasyGraphError(f"Adjacency matrix not square: eg,ny={A.shape}")
- dt=A.dtype
- try:
- python_type=kind_to_python_type[dt.kind]
- exceptExceptionaserr:
- raiseTypeError(f"Unknown numpy data type: {dt}")fromerr
-
- # Make sure we get even the isolated nodes of the graph.
- G.add_nodes_from(range(n))
- # Get a list of all the entries in the array with nonzero entries. These
- # coordinates become edges in the graph. (convert to int from np.int64)
- edges=((int(e[0]),int(e[1]))foreinzip(*A.nonzero()))
- # handle numpy constructed data type
- ifpython_type=="void":
- # Sort the fields by their offset, then by dtype, then by name.
- fields=sorted(
- (offset,dtype,name)forname,(dtype,offset)inA.dtype.fields.items()
- )
- triples=(
- (
- u,
- v,
- {
- name:kind_to_python_type[dtype.kind](val)
- for(_,dtype,name),valinzip(fields,A[u,v])
- },
- )
- foru,vinedges
- )
- # If the entries in the adjacency matrix are integers, the graph is a
- # multigraph, and parallel_edges is True, then create parallel edges, each
- # with weight 1, for each entry in the adjacency matrix. Otherwise, create
- # one edge for each positive entry in the adjacency matrix and set the
- # weight of that edge to be the entry in the matrix.
- elifpython_typeisintandG.is_multigraph()andparallel_edges:
- chain=itertools.chain.from_iterable
- # The following line is equivalent to:
- #
- # for (u, v) in edges:
- # for d in range(A[u, v]):
- # G.add_edge(u, v, weight=1)
- #
- triples=chain(
- ((u,v,{"weight":1})fordinrange(A[u,v]))for(u,v)inedges
- )
- else:# basic data type
- triples=((u,v,dict(weight=python_type(A[u,v])))foru,vinedges)
- # If we are creating an undirected multigraph, only add the edges from the
- # upper triangle of the matrix. Otherwise, add all the edges. This relies
- # on the fact that the vertices created in the
- # `_generated_weighted_edges()` function are actually the row/column
- # indices for the matrix `A`.
- #
- # Without this check, we run into a problem where each edge is added twice
- # when `G.add_edges_from()` is invoked below.
- ifG.is_multigraph()andnotG.is_directed():
- triples=((u,v,d)foru,v,dintriplesifu<=v)
- G.add_edges_from(triples)
- returnG
-
-
-
[docs]defto_numpy_array(
- G,
- nodelist=None,
- dtype=None,
- order=None,
- multigraph_weight=sum,
- weight="weight",
- nonedge=0.0,
-):
-"""Returns the graph adjacency matrix as a NumPy array.
-
- Parameters
- ----------
- G : graph
- The EasyGraph graph used to construct the NumPy array.
-
- nodelist : list, optional
- The rows and columns are ordered according to the nodes in `nodelist`.
- If `nodelist` is None, then the ordering is produced by G.nodes().
-
- dtype : NumPy data type, optional
- A valid single NumPy data type used to initialize the array.
- This must be a simple type such as int or numpy.float64 and
- not a compound data type (see to_numpy_recarray)
- If None, then the NumPy default is used.
-
- order : {'C', 'F'}, optional
- Whether to store multidimensional data in C- or Fortran-contiguous
- (row- or column-wise) order in memory. If None, then the NumPy default
- is used.
-
- multigraph_weight : {sum, min, max}, optional
- An operator that determines how weights in multigraphs are handled.
- The default is to sum the weights of the multiple edges.
-
- weight : string or None optional (default = 'weight')
- The edge attribute that holds the numerical value used for
- the edge weight. If an edge does not have that attribute, then the
- value 1 is used instead.
-
- nonedge : float (default = 0.0)
- The array values corresponding to nonedges are typically set to zero.
- However, this could be undesirable if there are array values
- corresponding to actual edges that also have the value zero. If so,
- one might prefer nonedges to have some other value, such as nan.
-
- Returns
- -------
- A : NumPy ndarray
- Graph adjacency matrix
-
- See Also
- --------
- from_numpy_array
-
- Notes
- -----
- For directed graphs, entry i,j corresponds to an edge from i to j.
-
- Entries in the adjacency matrix are assigned to the weight edge attribute.
- When an edge does not have a weight attribute, the value of the entry is
- set to the number 1. For multiple (parallel) edges, the values of the
- entries are determined by the `multigraph_weight` parameter. The default is
- to sum the weight attributes for each of the parallel edges.
-
- When `nodelist` does not contain every node in `G`, the adjacency matrix is
- built from the subgraph of `G` that is induced by the nodes in `nodelist`.
-
- The convention used for self-loop edges in graphs is to assign the
- diagonal array entry value to the weight attribute of the edge
- (or the number 1 if the edge has no weight attribute). If the
- alternate convention of doubling the edge weight is desired the
- resulting NumPy array can be modified as follows:
-
- >>> import numpy as np
- >>> G = eg.Graph([(1, 1)])
- >>> A = eg.to_numpy_array(G)
- >>> A
- array([[1.]])
- >>> A[np.diag_indices_from(A)] *= 2
- >>> A
- array([[2.]])
-
- Examples
- --------
- >>> G = eg.MultiDiGraph()
- >>> G.add_edge(0, 1, weight=2)
- 0
- >>> G.add_edge(1, 0)
- 0
- >>> G.add_edge(2, 2, weight=3)
- 0
- >>> G.add_edge(2, 2)
- 1
- >>> eg.to_numpy_array(G, nodelist=[0, 1, 2])
- array([[0., 2., 0.],
- [1., 0., 0.],
- [0., 0., 4.]])
-
- """
- importnumpyasnp
-
- ifnodelistisNone:
- nodelist=list(G)
- nodeset=G
- nlen=len(G)
- else:
- nlen=len(nodelist)
- nodeset=set(G.nodes)
- ifnlen!=len(nodeset):
- forninnodelist:
- ifnnotinG:
- raiseeg.EasyGraphError(f"Node {n} in nodelist is not in G")
- raiseeg.EasyGraphError("nodelist contains duplicates.")
-
- undirected=notG.is_directed()
- index=dict(zip(nodelist,range(nlen)))
-
- # Initially, we start with an array of nans. Then we populate the array
- # using data from the graph. Afterwards, any leftover nans will be
- # converted to the value of `nonedge`. Note, we use nans initially,
- # instead of zero, for two reasons:
- #
- # 1) It can be important to distinguish a real edge with the value 0
- # from a nonedge with the value 0.
- #
- # 2) When working with multi(di)graphs, we must combine the values of all
- # edges between any two nodes in some manner. This often takes the
- # form of a sum, min, or max. Using the value 0 for a nonedge would
- # have undesirable effects with min and max, but using nanmin and
- # nanmax with initially nan values is not problematic at all.
- #
- # That said, there are still some drawbacks to this approach. Namely, if
- # a real edge is nan, then that value is a) not distinguishable from
- # nonedges and b) is ignored by the default combinator (nansum, nanmin,
- # nanmax) functions used for multi(di)graphs. If this becomes an issue,
- # an alternative approach is to use masked arrays. Initially, every
- # element is masked and set to some `initial` value. As we populate the
- # graph, elements are unmasked (automatically) when we combine the initial
- # value with the values given by real edges. At the end, we convert all
- # masked values to `nonedge`. Using masked arrays fully addresses reason 1,
- # but for reason 2, we would still have the issue with min and max if the
- # initial values were 0.0. Note: an initial value of +inf is appropriate
- # for min, while an initial value of -inf is appropriate for max. When
- # working with sum, an initial value of zero is appropriate. Ideally then,
- # we'd want to allow users to specify both a value for nonedges and also
- # an initial value. For multi(di)graphs, the choice of the initial value
- # will, in general, depend on the combinator function---sensible defaults
- # can be provided.
-
- ifG.is_multigraph():
- # Handle MultiGraphs and MultiDiGraphs
- A=np.full((nlen,nlen),np.nan,order=order)
- # use numpy nan-aware operations
- operator={sum:np.nansum,min:np.nanmin,max:np.nanmax}
- try:
- op=operator[multigraph_weight]
- exceptExceptionaserr:
- raiseValueError("multigraph_weight must be sum, min, or max")fromerr
-
- foru,v,_,attrsinG.edges:
- if(uinnodeset)and(vinnodeset):
- i,j=index[u],index[v]
- e_weight=attrs.get(weight,1)
- A[i,j]=op([e_weight,A[i,j]])
- ifundirected:
- A[j,i]=A[i,j]
- else:
- # Graph or DiGraph, this is much faster than above
- A=np.full((nlen,nlen),np.nan,order=order)
- foru,nbrdictinG.adj.items():
- forv,dinnbrdict.items():
- try:
- A[index[u],index[v]]=d.get(weight,1)
- exceptKeyError:
- # This occurs when there are fewer desired nodes than
- # there are nodes in the graph: len(nodelist) < len(G)
- pass
-
- A[np.isnan(A)]=nonedge
- A=np.asarray(A,dtype=dtype)
- returnA
-
-
-
[docs]deffrom_pandas_adjacency(df,create_using=None):
-r"""Returns a graph from Pandas DataFrame.
-
- The Pandas DataFrame is interpreted as an adjacency matrix for the graph.
-
- Parameters
- ----------
- df : Pandas DataFrame
- An adjacency matrix representation of a graph
-
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
-
- Notes
- -----
- For directed graphs, explicitly mention create_using=eg.DiGraph,
- and entry i,j of df corresponds to an edge from i to j.
-
- If `df` has a single data type for each entry it will be converted to an
- appropriate Python data type.
-
- If `df` has a user-specified compound data type the names
- of the data fields will be used as attribute keys in the resulting
- EasyGraph graph.
-
- See Also
- --------
- to_pandas_adjacency
-
- Examples
- --------
- Simple integer weights on edges:
-
- >>> import pandas as pd
- >>> pd.options.display.max_columns = 20
- >>> df = pd.DataFrame([[1, 1], [2, 1]])
- >>> df
- 0 1
- 0 1 1
- 1 2 1
- >>> G = eg.from_pandas_adjacency(df)
- >>> G.name = "Graph from pandas adjacency matrix"
- """
-
- try:
- df=df[df.index]
- exceptExceptionaserr:
- missing=list(set(df.index).difference(set(df.columns)))
- msg=f"{missing} not in columns"
- raiseeg.EasyGraphError("Columns must match Indices.",msg)fromerr
-
- A=df.values
- G=from_numpy_array(A,create_using=create_using)
-
- G=eg.relabel_nodes(G,dict(enumerate(df.columns)))
- returnG
-
-
-
[docs]deffrom_pandas_edgelist(
- df,
- source="source",
- target="target",
- edge_attr=None,
- create_using=None,
- edge_key=None,
-):
-"""Returns a graph from Pandas DataFrame containing an edge list.
-
- The Pandas DataFrame should contain at least two columns of node names and
- zero or more columns of edge attributes. Each row will be processed as one
- edge instance.
-
- Note: This function iterates over DataFrame.values, which is not
- guaranteed to retain the data type across columns in the row. This is only
- a problem if your row is entirely numeric and a mix of ints and floats. In
- that case, all values will be returned as floats. See the
- DataFrame.iterrows documentation for an example.
-
- Parameters
- ----------
- df : Pandas DataFrame
- An edge list representation of a graph
-
- source : str or int
- A valid column name (string or integer) for the source nodes (for the
- directed case).
-
- target : str or int
- A valid column name (string or integer) for the target nodes (for the
- directed case).
-
- edge_attr : str or int, iterable, True, or None
- A valid column name (str or int) or iterable of column names that are
- used to retrieve items and add them to the graph as edge attributes.
- If `True`, all of the remaining columns will be added.
- If `None`, no edge attributes are added to the graph.
-
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
-
- edge_key : str or None, optional (default=None)
- A valid column name for the edge keys (for a MultiGraph). The values in
- this column are used for the edge keys when adding edges if create_using
- is a multigraph.
-
- See Also
- --------
- to_pandas_edgelist
-
- Examples
- --------
- Simple integer weights on edges:
-
- >>> import pandas as pd
- >>> pd.options.display.max_columns = 20
- >>> import numpy as np
- >>> rng = np.random.RandomState(seed=5)
- >>> ints = rng.randint(1, 11, size=(3, 2))
- >>> a = ["A", "B", "C"]
- >>> b = ["D", "A", "E"]
- >>> df = pd.DataFrame(ints, columns=["weight", "cost"])
- >>> df[0] = a
- >>> df["b"] = b
- >>> df[["weight", "cost", 0, "b"]]
- weight cost 0 b
- 0 4 7 A D
- 1 7 1 B A
- 2 10 9 C E
- >>> G = eg.from_pandas_edgelist(df, 0, "b", ["weight", "cost"])
- >>> G["E"]["C"]["weight"]
- 10
- >>> G["E"]["C"]["cost"]
- 9
- >>> edges = pd.DataFrame(
- ... {
- ... "source": [0, 1, 2],
- ... "target": [2, 2, 3],
- ... "weight": [3, 4, 5],
- ... "color": ["red", "blue", "blue"],
- ... }
- ... )
- >>> G = eg.from_pandas_edgelist(edges, edge_attr=True)
- >>> G[0][2]["color"]
- 'red'
-
- Build multigraph with custom keys:
-
- >>> edges = pd.DataFrame(
- ... {
- ... "source": [0, 1, 2, 0],
- ... "target": [2, 2, 3, 2],
- ... "my_edge_key": ["A", "B", "C", "D"],
- ... "weight": [3, 4, 5, 6],
- ... "color": ["red", "blue", "blue", "blue"],
- ... }
- ... )
- >>> G = eg.from_pandas_edgelist(
- ... edges,
- ... edge_key="my_edge_key",
- ... edge_attr=["weight", "color"],
- ... create_using=eg.MultiGraph(),
- ... )
- >>> G[0][2]
- AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}})
-
-
- """
- g=eg.empty_graph(0,create_using)
-
- ifedge_attrisNone:
- g.add_edges_from(zip(df[source],df[target]))
- returng
-
- reserved_columns=[source,target]
-
- # Additional columns requested
- attr_col_headings=[]
- attribute_data=[]
- ifedge_attrisTrue:
- attr_col_headings=[cforcindf.columnsifcnotinreserved_columns]
- elifisinstance(edge_attr,(list,tuple)):
- attr_col_headings=edge_attr
- else:
- attr_col_headings=[edge_attr]
- iflen(attr_col_headings)==0:
- raiseeg.EasyGraphError(
- "Invalid edge_attr argument: No columns found with name:"
- f" {attr_col_headings}"
- )
-
- try:
- attribute_data=zip(*[df[col]forcolinattr_col_headings])
- except(KeyError,TypeError)aserr:
- msg=f"Invalid edge_attr argument: {edge_attr}"
- raiseeg.EasyGraphError(msg)fromerr
-
- ifg.is_multigraph():
- # => append the edge keys from the df to the bundled data
- ifedge_keyisnotNone:
- try:
- multigraph_edge_keys=df[edge_key]
- attribute_data=zip(attribute_data,multigraph_edge_keys)
- except(KeyError,TypeError)aserr:
- msg=f"Invalid edge_key argument: {edge_key}"
- raiseeg.EasyGraphError(msg)fromerr
-
- fors,t,attrsinzip(df[source],df[target],attribute_data):
- ifedge_keyisnotNone:
- attrs,multigraph_edge_key=attrs
- key=g.add_edge(s,t,key=multigraph_edge_key)
- else:
- key=g.add_edge(s,t)
-
- g[s][t][key].update(zip(attr_col_headings,attrs))
- else:
- fors,t,attrsinzip(df[source],df[target],attribute_data):
- g.add_edge(s,t)
- g[s][t].update(zip(attr_col_headings,attrs))
-
- returng
-
-
-
[docs]deffrom_scipy_sparse_matrix(
- A,parallel_edges=False,create_using=None,edge_attribute="weight"
-):
-"""Creates a new graph from an adjacency matrix given as a SciPy sparse
- matrix.
-
- Parameters
- ----------
- A: scipy sparse matrix
- An adjacency matrix representation of a graph
-
- parallel_edges : Boolean
- If this is True, `create_using` is a multigraph, and `A` is an
- integer matrix, then entry *(i, j)* in the matrix is interpreted as the
- number of parallel edges joining vertices *i* and *j* in the graph.
- If it is False, then the entries in the matrix are interpreted as
- the weight of a single edge joining the vertices.
-
- create_using : EasyGraph graph constructor, optional (default=eg.Graph)
- Graph type to create. If graph instance, then cleared before populated.
-
- edge_attribute: string
- Name of edge attribute to store matrix numeric value. The data will
- have the same type as the matrix entry (int, float, (real,imag)).
-
- Notes
- -----
- For directed graphs, explicitly mention create_using=eg.DiGraph,
- and entry i,j of A corresponds to an edge from i to j.
-
- If `create_using` is :class:`easygraph.MultiGraph` or
- :class:`easygraph.MultiDiGraph`, `parallel_edges` is True, and the
- entries of `A` are of type :class:`int`, then this function returns a
- multigraph (constructed from `create_using`) with parallel edges.
- In this case, `edge_attribute` will be ignored.
-
- If `create_using` indicates an undirected multigraph, then only the edges
- indicated by the upper triangle of the matrix `A` will be added to the
- graph.
-
- Examples
- --------
- >>> import scipy as sp
- >>> import scipy.sparse # call as sp.sparse
- >>> A = sp.sparse.eye(2, 2, 1)
- >>> G = eg.from_scipy_sparse_matrix(A)
-
- If `create_using` indicates a multigraph and the matrix has only integer
- entries and `parallel_edges` is Falnxse, then the entries will be treated
- as weights for edges joining the nodes (without creating parallel edges):
-
- >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]])
- >>> G = eg.from_scipy_sparse_matrix(A, create_using=eg.MultiGraph)
- >>> G[1][1]
- AtlasView({0: {'weight': 2}})
-
- If `create_using` indicates a multigraph and the matrix has only integer
- entries and `parallel_edges` is True, then the entries will be treated
- as the number of parallel edges joining those two vertices:
-
- >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]])
- >>> G = eg.from_scipy_sparse_matrix(
- ... A, parallel_edges=True, create_using=eg.MultiGraph
- ... )
- >>> G[1][1]
- AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
-
- """
-
- returnfrom_scipy_sparse_array(
- A,
- parallel_edges=parallel_edges,
- create_using=create_using,
- edge_attribute=edge_attribute,
- )
-
-
-deffrom_scipy_sparse_array(
- A,parallel_edges=False,create_using=None,edge_attribute="weight"
-):
- G=eg.empty_graph(0,create_using)
- n,m=A.shape
- ifn!=m:
- raiseeg.EasyGraphError(f"Adjacency matrix not square: nx,ny={A.shape}")
- # Make sure we get even the isolated nodes of the graph.
- G.add_nodes_from(range(n))
- # Create an iterable over (u, v, w) triples and for each triple, add an
- # edge from u to v with weight w.
- triples=_generate_weighted_edges(A)
- # If the entries in the adjacency matrix are integers, the graph is a
- # multigraph, and parallel_edges is True, then create parallel edges, each
- # with weight 1, for each entry in the adjacency matrix. Otherwise, create
- # one edge for each positive entry in the adjacency matrix and set the
- # weight of that edge to be the entry in the matrix.
- ifA.dtype.kindin("i","u")andG.is_multigraph()andparallel_edges:
- chain=itertools.chain.from_iterable
- # The following line is equivalent to:
- #
- # for (u, v) in edges:
- # for d in range(A[u, v]):
- # G.add_edge(u, v, weight=1)
- #
- triples=chain(((u,v,1)fordinrange(w))for(u,v,w)intriples)
- # If we are creating an undirected multigraph, only add the edges from the
- # upper triangle of the matrix. Otherwise, add all the edges. This relies
- # on the fact that the vertices created in the
- # `_generated_weighted_edges()` function are actually the row/column
- # indices for the matrix `A`.
- #
- # Without this check, we run into a problem where each edge is added twice
- # when `G.add_weighted_edges_from()` is invoked below.
- ifG.is_multigraph()andnotG.is_directed():
- triples=((u,v,d)foru,v,dintriplesifu<=v)
- G.add_edges_from(((u,v,{"weight":d})foru,v,dintriples))
- returnG
-
-
-def_generate_weighted_edges(A):
-"""Returns an iterable over (u, v, w) triples, where u and v are adjacent
- vertices and w is the weight of the edge joining u and v.
-
- `A` is a SciPy sparse matrix (in any format).
-
- """
- ifA.format=="csr":
- return_csr_gen_triples(A)
- ifA.format=="csc":
- return_csc_gen_triples(A)
- ifA.format=="dok":
- return_dok_gen_triples(A)
- # If A is in any other format (including COO), convert it to COO format.
- return_coo_gen_triples(A.tocoo())
-
-
-def_csr_gen_triples(A):
-"""Converts a SciPy sparse matrix in **Compressed Sparse Row** format to
- an iterable of weighted edge triples.
-
- """
- nrows=A.shape[0]
- data,indices,indptr=A.data,A.indices,A.indptr
- foriinrange(nrows):
- forjinrange(indptr[i],indptr[i+1]):
- yieldi,indices[j],data[j]
-
-
-def_csc_gen_triples(A):
-"""Converts a SciPy sparse matrix in **Compressed Sparse Column** format to
- an iterable of weighted edge triples.
-
- """
- ncols=A.shape[1]
- data,indices,indptr=A.data,A.indices,A.indptr
- foriinrange(ncols):
- forjinrange(indptr[i],indptr[i+1]):
- yieldindices[j],i,data[j]
-
-
-def_coo_gen_triples(A):
-"""Converts a SciPy sparse matrix in **Coordinate** format to an iterable
- of weighted edge triples.
-
- """
- row,col,data=A.row,A.col,A.data
- returnzip(row,col,data)
-
-
-def_dok_gen_triples(A):
-"""Converts a SciPy sparse matrix in **Dictionary of Keys** format to an
- iterable of weighted edge triples.
-
- """
- for(r,c),vinA.items():
- yieldr,c,v
-
[docs]defretry_method_with_fix(fix_method):
-"""Decorator that executes a fix method before retrying again when the decorated method
- fails once with any exception.
-
- If the decorated method fails again, the execution fails with that exception.
-
- Notes
- -----
- This decorator only works on class methods, and the fix function must also be a class method.
- It would not work on functions.
-
- Parameters
- ----------
- fix_func : callable
- The fix method to execute. It should not accept any arguments. Its return values are
- ignored.
- """
-
- def_creator(func):
- @wraps(func)
- defwrapper(self,*args,**kwargs):
- # pylint: disable=W0703,bare-except
- try:
- returnfunc(self,*args,**kwargs)
- except:
- fix_method(self)
- returnfunc(self,*args,**kwargs)
-
- returnwrapper
-
- return_creator
-
-
-
[docs]defonly_implemented_for_UnDirected_graph(func):
- # print("--------{:<40}: Only Implemented For UnDirected Graph--------".format(func.__name__))
- returnfunc
-
-
-
[docs]defonly_implemented_for_Directed_graph(func):
- # print("--------{:<40}: Only Implemented For Directed Graph--------".format(func.__name__))
- returnfunc
-
-
-
[docs]defnot_implemented_for(*graph_types):
-"""Decorator to mark algorithms as not implemented
-
- Parameters
- ----------
- graph_types : container of strings
- Entries must be one of "directed", "undirected", "multigraph", or "graph".
-
- Returns
- -------
- _require : function
- The decorated function.
-
- Raises
- ------
- EasyGraphNotImplemented
- If any of the packages cannot be imported
-
- Notes
- -----
- Multiple types are joined logically with "and".
- For "or" use multiple @not_implemented_for() lines.
-
- Examples
- --------
- Decorate functions like this::
-
- @not_implemented_for("directed")
- def sp_function(G):
- pass
-
- # rule out MultiDiGraph
- @not_implemented_for("directed","multigraph")
- def sp_np_function(G):
- pass
-
- # rule out all except DiGraph
- @not_implemented_for("undirected")
- @not_implemented_for("multigraph")
- def sp_np_function(G):
- pass
- """
- if("directed"ingraph_types)and("undirected"ingraph_types):
- raiseValueError("Function not implemented on directed AND undirected graphs?")
- if("multigraph"ingraph_types)and("graph"ingraph_types):
- raiseValueError("Function not implemented on graph AND multigraphs?")
- ifnotset(graph_types)<{"directed","undirected","multigraph","graph"}:
- raiseKeyError(
- "use one or more of directed, undirected, multigraph, graph. "
- f"You used {graph_types}"
- )
-
- # 3-way logic: True if "directed" input, False if "undirected" input, else None
- dval=("directed"ingraph_types)ornot("undirected"ingraph_types)andNone
- mval=("multigraph"ingraph_types)ornot("graph"ingraph_types)andNone
- errmsg=f"not implemented for {' '.join(graph_types)} type"
-
- def_not_implemented_for(g):
- if(mvalisNoneormval==g.is_multigraph())and(
- dvalisNoneordval==g.is_directed()
- ):
- raiseeg.EasyGraphNotImplemented(errmsg)
-
- returng
-
- returnargmap(_not_implemented_for,0)
-
-
-# To handle new extensions, define a function accepting a `path` and `mode`.
-# Then add the extension to _dispatch_dict.
-fopeners={
- ".gz":gzip.open,
- ".gzip":gzip.open,
- ".bz2":bz2.BZ2File,
-}
-_dispatch_dict=defaultdict(lambda:open,**fopeners)# type: ignore
-
-
-
[docs]defopen_file(path_arg,mode="r"):
-"""Decorator to ensure clean opening and closing of files.
-
- Parameters
- ----------
- path_arg : string or int
- Name or index of the argument that is a path.
-
- mode : str
- String for opening mode.
-
- Returns
- -------
- _open_file : function
- Function which cleanly executes the io.
-
- Examples
- --------
- Decorate functions like this::
-
- @open_file(0,"r")
- def read_function(pathname):
- pass
-
- @open_file(1,"w")
- def write_function(G, pathname):
- pass
-
- @open_file(1,"w")
- def write_function(G, pathname="graph.dot"):
- pass
-
- @open_file("pathname","w")
- def write_function(G, pathname="graph.dot"):
- pass
-
- @open_file("path", "w+")
- def another_function(arg, **kwargs):
- path = kwargs["path"]
- pass
-
- Notes
- -----
- Note that this decorator solves the problem when a path argument is
- specified as a string, but it does not handle the situation when the
- function wants to accept a default of None (and then handle it).
-
- Here is an example of how to handle this case::
-
- @open_file("path")
- def some_function(arg1, arg2, path=None):
- if path is None:
- fobj = tempfile.NamedTemporaryFile(delete=False)
- else:
- # `path` could have been a string or file object or something
- # similar. In any event, the decorator has given us a file object
- # and it will close it for us, if it should.
- fobj = path
-
- try:
- fobj.write("blah")
- finally:
- if path is None:
- fobj.close()
-
- Normally, we'd want to use "with" to ensure that fobj gets closed.
- However, the decorator will make `path` a file object for us,
- and using "with" would undesirably close that file object.
- Instead, we use a try block, as shown above.
- When we exit the function, fobj will be closed, if it should be, by the decorator.
- """
-
- def_open_file(path):
- # Now we have the path_arg. There are two types of input to consider:
- # 1) string representing a path that should be opened
- # 2) an already opened file object
- ifisinstance(path,str):
- ext=splitext(path)[1]
- elifisinstance(path,Path):
- # path is a pathlib reference to a filename
- ext=path.suffix
- path=str(path)
- else:
- # could be None, or a file handle, in which case the algorithm will deal with it
- returnpath,lambda:None
-
- fobj=_dispatch_dict[ext](path,mode=mode)
- returnfobj,lambda:fobj.close()
-
- returnargmap(_open_file,path_arg,try_finally=True)
-
-
-classargmap:
-"""A decorator to apply a map to arguments before calling the function
-
- This class provides a decorator that maps (transforms) arguments of the function
- before the function is called. Thus for example, we have similar code
- in many functions to determine whether an argument is the number of nodes
- to be created, or a list of nodes to be handled. The decorator provides
- the code to accept either -- transforming the indicated argument into a
- list of nodes before the actual function is called.
-
- This decorator class allows us to process single or multiple arguments.
- The arguments to be processed can be specified by string, naming the argument,
- or by index, specifying the item in the args list.
-
- Parameters
- ----------
- func : callable
- The function to apply to arguments
-
- *args : iterable of (int, str or tuple)
- A list of parameters, specified either as strings (their names), ints
- (numerical indices) or tuples, which may contain ints, strings, and
- (recursively) tuples. Each indicates which parameters the decorator
- should map. Tuples indicate that the map function takes (and returns)
- multiple parameters in the same order and nested structure as indicated
- here.
-
- try_finally : bool (default: False)
- When True, wrap the function call in a try-finally block with code
- for the finally block created by `func`. This is used when the map
- function constructs an object (like a file handle) that requires
- post-processing (like closing).
-
- Examples
- --------
- Most of these examples use `@argmap(...)` to apply the decorator to
- the function defined on the next line.
- In the EasyGraph codebase however, `argmap` is used within a function to
- construct a decorator. That is, the decorator defines a mapping function
- and then uses `argmap` to build and return a decorated function.
- A simple example is a decorator that specifies which currency to report money.
- The decorator (named `convert_to`) would be used like::
-
- @convert_to("US_Dollars", "income")
- def show_me_the_money(name, income):
- print(f"{name} : {income}")
-
- And the code to create the decorator might be::
-
- def convert_to(currency, which_arg):
- def _convert(amount):
- if amount.currency != currency:
- amount = amount.to_currency(currency)
- return amount
- return argmap(_convert, which_arg)
-
- Despite this common idiom for argmap, most of the following examples
- use the `@argmap(...)` idiom to save space.
-
- Here's an example use of argmap to sum the elements of two of the functions
- arguments. The decorated function::
-
- @argmap(sum, "xlist", "zlist")
- def foo(xlist, y, zlist):
- return xlist - y + zlist
-
- is syntactic sugar for::
-
- def foo(xlist, y, zlist):
- x = sum(xlist)
- z = sum(zlist)
- return x - y + z
-
- and is equivalent to (using argument indexes)::
-
- @argmap(sum, "xlist", 2)
- def foo(xlist, y, zlist):
- return xlist - y + zlist
-
- or::
-
- @argmap(sum, "zlist", 0)
- def foo(xlist, y, zlist):
- return xlist - y + zlist
-
- Transforming functions can be applied to multiple arguments, such as::
-
- def swap(x, y):
- return y, x
-
- # the 2-tuple tells argmap that the map `swap` has 2 inputs/outputs.
- @argmap(swap, ("a", "b")):
- def foo(a, b, c):
- return a / b * c
-
- is equivalent to::
-
- def foo(a, b, c):
- a, b = swap(a, b)
- return a / b * c
-
- More generally, the applied arguments can be nested tuples of strings or ints.
- The syntax `@argmap(some_func, ("a", ("b", "c")))` would expect `some_func` to
- accept 2 inputs with the second expected to be a 2-tuple. It should then return
- 2 outputs with the second a 2-tuple. The returns values would replace input "a"
- "b" and "c" respectively. Similarly for `@argmap(some_func, (0, ("b", 2)))`.
-
- Also, note that an index larger than the number of named parameters is allowed
- for variadic functions. For example::
-
- def double(a):
- return 2 * a
-
- @argmap(double, 3)
- def overflow(a, *args):
- return a, args
-
- print(overflow(1, 2, 3, 4, 5, 6)) # output is 1, (2, 3, 8, 5, 6)
-
- **Try Finally**
-
- Additionally, this `argmap` class can be used to create a decorator that
- initiates a try...finally block. The decorator must be written to return
- both the transformed argument and a closing function.
- This feature was included to enable the `open_file` decorator which might
- need to close the file or not depending on whether it had to open that file.
- This feature uses the keyword-only `try_finally` argument to `@argmap`.
-
- For example this map opens a file and then makes sure it is closed::
-
- def open_file(fn):
- f = open(fn)
- return f, lambda: f.close()
-
- The decorator applies that to the function `foo`::
-
- @argmap(open_file, "file", try_finally=True)
- def foo(file):
- print(file.read())
-
- is syntactic sugar for::
-
- def foo(file):
- file, close_file = open_file(file)
- try:
- print(file.read())
- finally:
- close_file()
-
- and is equivalent to (using indexes)::
-
- @argmap(open_file, 0, try_finally=True)
- def foo(file):
- print(file.read())
-
- Here's an example of the try_finally feature used to create a decorator::
-
- def my_closing_decorator(which_arg):
- def _opener(path):
- if path is None:
- path = open(path)
- fclose = path.close
- else:
- # assume `path` handles the closing
- fclose = lambda: None
- return path, fclose
- return argmap(_opener, which_arg, try_finally=True)
-
- which can then be used as::
-
- @my_closing_decorator("file")
- def fancy_reader(file=None):
- # this code doesn't need to worry about closing the file
- print(file.read())
-
- Notes
- -----
- An object of this class is callable and intended to be used when
- defining a decorator. Generally, a decorator takes a function as input
- and constructs a function as output. Specifically, an `argmap` object
- returns the input function decorated/wrapped so that specified arguments
- are mapped (transformed) to new values before the decorated function is called.
-
- As an overview, the argmap object returns a new function with all the
- dunder values of the original function (like `__doc__`, `__name__`, etc).
- Code for this decorated function is built based on the original function's
- signature. It starts by mapping the input arguments to potentially new
- values. Then it calls the decorated function with these new values in place
- of the indicated arguments that have been mapped. The return value of the
- original function is then returned. This new function is the function that
- is actually called by the user.
-
- Three additional features are provided.
- 1) The code is lazily compiled. That is, the new function is returned
- as an object without the code compiled, but with all information
- needed so it can be compiled upon it's first invocation. This saves
- time on import at the cost of additional time on the first call of
- the function. Subsequent calls are then just as fast as normal.
-
- 2) If the "try_finally" keyword-only argument is True, a try block
- follows each mapped argument, matched on the other side of the wrapped
- call, by a finally block closing that mapping. We expect func to return
- a 2-tuple: the mapped value and a function to be called in the finally
- clause. This feature was included so the `open_file` decorator could
- provide a file handle to the decorated function and close the file handle
- after the function call. It even keeps track of whether to close the file
- handle or not based on whether it had to open the file or the input was
- already open. So, the decorated function does not need to include any
- code to open or close files.
-
- 3) The maps applied can process multiple arguments. For example,
- you could swap two arguments using a mapping, or transform
- them to their sum and their difference. This was included to allow
- a decorator in the `quality.py` module that checks that an input
- `partition` is a valid partition of the nodes of the input graph `G`.
- In this example, the map has inputs `(G, partition)`. After checking
- for a valid partition, the map either raises an exception or leaves
- the inputs unchanged. Thus many functions that make this check can
- use the decorator rather than copy the checking code into each function.
- More complicated nested argument structures are described below.
-
- The remaining notes describe the code structure and methods for this
- class in broad terms to aid in understanding how to use it.
-
- Instantiating an `argmap` object simply stores the mapping function and
- the input identifiers of which arguments to map. The resulting decorator
- is ready to use this map to decorate any function. Calling that object
- (`argmap.__call__`, but usually done via `@my_decorator`) a lazily
- compiled thin wrapper of the decorated function is constructed,
- wrapped with the necessary function dunder attributes like `__doc__`
- and `__name__`. That thinly wrapped function is returned as the
- decorated function. When that decorated function is called, the thin
- wrapper of code calls `argmap._lazy_compile` which compiles the decorated
- function (using `argmap.compile`) and replaces the code of the thin
- wrapper with the newly compiled code. This saves the compilation step
- every import of easygraph, at the cost of compiling upon the first call
- to the decorated function.
-
- When the decorated function is compiled, the code is recursively assembled
- using the `argmap.assemble` method. The recursive nature is needed in
- case of nested decorators. The result of the assembly is a number of
- useful objects.
-
- sig : the function signature of the original decorated function as
- constructed by :func:`argmap.signature`. This is constructed
- using `inspect.signature` but enhanced with attribute
- strings `sig_def` and `sig_call`, and other information
- specific to mapping arguments of this function.
- This information is used to construct a string of code defining
- the new decorated function.
-
- wrapped_name : a unique internally used name constructed by argmap
- for the decorated function.
-
- functions : a dict of the functions used inside the code of this
- decorated function, to be used as `globals` in `exec`.
- This dict is recursively updated to allow for nested decorating.
-
- mapblock : code (as a list of strings) to map the incoming argument
- values to their mapped values.
-
- finallys : code (as a list of strings) to provide the possibly nested
- set of finally clauses if needed.
-
- mutable_args : a bool indicating whether the `sig.args` tuple should be
- converted to a list so mutation can occur.
-
- After this recursive assembly process, the `argmap.compile` method
- constructs code (as strings) to convert the tuple `sig.args` to a list
- if needed. It joins the defining code with appropriate indents and
- compiles the result. Finally, this code is evaluated and the original
- wrapper's implementation is replaced with the compiled version (see
- `argmap._lazy_compile` for more details).
-
- Other `argmap` methods include `_name` and `_count` which allow internally
- generated names to be unique within a python session.
- The methods `_flatten` and `_indent` process the nested lists of strings
- into properly indented python code ready to be compiled.
-
- More complicated nested tuples of arguments also allowed though
- usually not used. For the simple 2 argument case, the argmap
- input ("a", "b") implies the mapping function will take 2 arguments
- and return a 2-tuple of mapped values. A more complicated example
- with argmap input `("a", ("b", "c"))` requires the mapping function
- take 2 inputs, with the second being a 2-tuple. It then must output
- the 3 mapped values in the same nested structure `(newa, (newb, newc))`.
- This level of generality is not often needed, but was convenient
- to implement when handling the multiple arguments.
-
- See Also
- --------
- not_implemented_for
- open_file
- nodes_or_number
- random_state
- py_random_state
- easygraph.community.quality.require_partition
- require_partition
-
- """
-
- def__init__(self,func,*args,try_finally=False):
- self._func=func
- self._args=args
- self._finally=try_finally
-
- @staticmethod
- def_lazy_compile(func):
-"""Compile the source of a wrapped function
-
- Assemble and compile the decorated function, and intrusively replace its
- code with the compiled version's. The thinly wrapped function becomes
- the decorated function.
-
- Parameters
- ----------
- func : callable
- A function returned by argmap.__call__ which is in the process
- of being called for the first time.
-
- Returns
- -------
- func : callable
- The same function, with a new __code__ object.
-
- Notes
- -----
- It was observed in easygraph issue #4732 [1] that the import time of
- easygraph was significantly bloated by the use of decorators: over half
- of the import time was being spent decorating functions. This was
- somewhat improved by a change made to the `decorator` library, at the
- cost of a relatively heavy-weight call to `inspect.Signature.bind`
- for each call to the decorated function.
-
- The workaround we arrived at is to do minimal work at the time of
- decoration. When the decorated function is called for the first time,
- we compile a function with the same function signature as the wrapped
- function. The resulting decorated function is faster than one made by
- the `decorator` library, so that the overhead of the first call is
- 'paid off' after a small number of calls.
- """
- real_func=func.__argmap__.compile(func.__wrapped__)
- func.__code__=real_func.__code__
- func.__globals__.update(real_func.__globals__)
- func.__dict__.update(real_func.__dict__)
- returnfunc
-
- def__call__(self,f):
-"""Construct a lazily decorated wrapper of f.
-
- The decorated function will be compiled when it is called for the first time,
- and it will replace its own __code__ object so subsequent calls are fast.
-
- Parameters
- ----------
- f : callable
- A function to be decorated.
-
- Returns
- -------
- func : callable
- The decorated function.
-
- See Also
- --------
- argmap._lazy_compile
- """
-
- ifinspect.isgeneratorfunction(f):
-
- deffunc(*args,__wrapper=None,**kwargs):
- yield fromargmap._lazy_compile(__wrapper)(*args,**kwargs)
-
- else:
-
- deffunc(*args,__wrapper=None,**kwargs):
- returnargmap._lazy_compile(__wrapper)(*args,**kwargs)
-
- # standard function-wrapping stuff
- func.__name__=f.__name__
- func.__doc__=f.__doc__
- func.__defaults__=f.__defaults__
- func.__kwdefaults__.update(f.__kwdefaults__or{})
- func.__module__=f.__module__
- func.__qualname__=f.__qualname__
- func.__dict__.update(f.__dict__)
- func.__wrapped__=f
-
- # now that we've wrapped f, we may have picked up some __dict__ or
- # __kwdefaults__ items that were set by a previous argmap. Thus, we set
- # these values after those update() calls.
-
- # If we attempt to access func from within itself, that happens through
- # a closure -- which trips an error when we replace func.__code__. The
- # standard workaround for functions which can't see themselves is to use
- # a Y-combinator, as we do here.
- func.__kwdefaults__["_argmap__wrapper"]=func
-
- # this self-reference is here because functools.wraps preserves
- # everything in __dict__, and we don't want to mistake a non-argmap
- # wrapper for an argmap wrapper
- func.__self__=func
-
- # this is used to variously call self.assemble and self.compile
- func.__argmap__=self
-
- returnfunc
-
- __count=0
-
- @classmethod
- def_count(cls):
-"""Maintain a globally-unique identifier for function names and "file" names
-
- Note that this counter is a class method reporting a class variable
- so the count is unique within a Python session. It could differ from
- session to session for a specific decorator depending on the order
- that the decorators are created. But that doesn't disrupt `argmap`.
-
- This is used in two places: to construct unique variable names
- in the `_name` method and to construct unique fictitious filenames
- in the `_compile` method.
-
- Returns
- -------
- count : int
- An integer unique to this Python session (simply counts from zero)
- """
- cls.__count+=1
- returncls.__count
-
- _bad_chars=re.compile("[^a-zA-Z0-9_]")
-
- @classmethod
- def_name(cls,f):
-"""Mangle the name of a function to be unique but somewhat human-readable
-
- The names are unique within a Python session and set using `_count`.
-
- Parameters
- ----------
- f : str or object
-
- Returns
- -------
- name : str
- The mangled version of `f.__name__` (if `f.__name__` exists) or `f`
-
- """
- f=f.__name__ifhasattr(f,"__name__")elsef
- fname=re.sub(cls._bad_chars,"_",f)
- returnf"argmap_{fname}_{cls._count()}"
-
- defcompile(self,f):
-"""Compile the decorated function.
-
- Called once for a given decorated function -- collects the code from all
- argmap decorators in the stack, and compiles the decorated function.
-
- Much of the work done here uses the `assemble` method to allow recursive
- treatment of multiple argmap decorators on a single decorated function.
- That flattens the argmap decorators, collects the source code to construct
- a single decorated function, then compiles/executes/returns that function.
-
- The source code for the decorated function is stored as an attribute
- `_code` on the function object itself.
-
- Note that Python's `compile` function requires a filename, but this
- code is constructed without a file, so a fictitious filename is used
- to describe where the function comes from. The name is something like:
- "argmap compilation 4".
-
- Parameters
- ----------
- f : callable
- The function to be decorated
-
- Returns
- -------
- func : callable
- The decorated file
-
- """
- sig,wrapped_name,functions,mapblock,finallys,mutable_args=self.assemble(
- f
- )
-
- call=f"{sig.call_sig.format(wrapped_name)}#"
- mut_args=f"{sig.args} = list({sig.args})"ifmutable_argselse""
- body=argmap._indent(sig.def_sig,mut_args,mapblock,call,finallys)
- code="\n".join(body)
-
- locl={}
- globl=dict(functions.values())
- filename=f"{self.__class__} compilation {self._count()}"
- compiled=compile(code,filename,"exec")
- exec(compiled,globl,locl)
- func=locl[sig.name]
- func._code=code
- returnfunc
-
- defassemble(self,f):
-"""Collects components of the source for the decorated function wrapping f.
-
- If `f` has multiple argmap decorators, we recursively assemble the stack of
- decorators into a single flattened function.
-
- This method is part of the `compile` method's process yet separated
- from that method to allow recursive processing. The outputs are
- strings, dictionaries and lists that collect needed info to
- flatten any nested argmap-decoration.
-
- Parameters
- ----------
- f : callable
- The function to be decorated. If f is argmapped, we assemble it.
-
- Returns
- -------
- sig : argmap.Signature
- The function signature as an `argmap.Signature` object.
- wrapped_name : str
- The mangled name used to represent the wrapped function in the code
- being assembled.
- functions : dict
- A dictionary mapping id(g) -> (mangled_name(g), g) for functions g
- referred to in the code being assembled. These need to be present
- in the ``globals`` scope of ``exec`` when defining the decorated
- function.
- mapblock : list of lists and/or strings
- Code that implements mapping of parameters including any try blocks
- if needed. This code will precede the decorated function call.
- finallys : list of lists and/or strings
- Code that implements the finally blocks to post-process the
- arguments (usually close any files if needed) after the
- decorated function is called.
- mutable_args : bool
- True if the decorator needs to modify positional arguments
- via their indices. The compile method then turns the argument
- tuple into a list so that the arguments can be modified.
- """
-
- # first, we check if f is already argmapped -- if that's the case,
- # build up the function recursively.
- # > mapblock is generally a list of function calls of the sort
- # arg = func(arg)
- # in addition to some try-blocks if needed.
- # > finallys is a recursive list of finally blocks of the sort
- # finally:
- # close_func_1()
- # finally:
- # close_func_2()
- # > functions is a dict of functions used in the scope of our decorated
- # function. It will be used to construct globals used in compilation.
- # We make functions[id(f)] = name_of_f, f to ensure that a given
- # function is stored and named exactly once even if called by
- # nested decorators.
- ifhasattr(f,"__argmap__")andf.__self__isf:
- (
- sig,
- wrapped_name,
- functions,
- mapblock,
- finallys,
- mutable_args,
- )=f.__argmap__.assemble(f.__wrapped__)
- functions=dict(functions)# shallow-copy just in case
- else:
- sig=self.signature(f)
- wrapped_name=self._name(f)
- mapblock,finallys=[],[]
- functions={id(f):(wrapped_name,f)}
- mutable_args=False
-
- ifid(self._func)infunctions:
- fname,_=functions[id(self._func)]
- else:
- fname,_=functions[id(self._func)]=self._name(self._func),self._func
-
- # this is a bit complicated -- we can call functions with a variety of
- # nested arguments, so long as their input and output are tuples with
- # the same nested structure. e.g. ("a", "b") maps arguments a and b.
- # A more complicated nesting like (0, (3, 4)) maps arguments 0, 3, 4
- # expecting the mapping to output new values in the same nested shape.
- # while we're not taking full advantage of the ability to handle
- # multiply-nested tuples, it was convenient to implement this in
- # generality because the recursive call to `get_name` is necessary in
- # any case.
- applied=set()
-
- defget_name(arg,first=True):
- nonlocalmutable_args
- ifisinstance(arg,tuple):
- name=", ".join(get_name(x,False)forxinarg)
- returnnameiffirstelsef"({name})"
- ifarginapplied:
- raiseEasyGraphError(f"argument {arg} is specified multiple times")
- applied.add(arg)
- ifarginsig.names:
- returnsig.names[arg]
- elifisinstance(arg,str):
- ifsig.kwargsisNone:
- raiseEasyGraphError(
- f"name {arg} is not a named parameter and this function doesn't"
- " have kwargs"
- )
- returnf"{sig.kwargs}[{arg!r}]"
- else:
- ifsig.argsisNone:
- raiseEasyGraphError(
- f"index {arg} not a parameter index and this function doesn't"
- " have args"
- )
- mutable_args=True
- returnf"{sig.args}[{arg-sig.n_positional}]"
-
- ifself._finally:
- # here's where we handle try_finally decorators. Such a decorator
- # returns a mapped argument and a function to be called in a
- # finally block. This feature was required by the open_file
- # decorator. The below generates the code
- #
- # name, final = func(name) #<--append to mapblock
- # try: #<--append to mapblock
- # ... more argmapping and try blocks
- # return WRAPPED_FUNCTION(...)
- # ... more finally blocks
- # finally: #<--prepend to finallys
- # final() #<--prepend to finallys
- #
- forainself._args:
- name=get_name(a)
- final=self._name(name)
- mapblock.append(f"{name}, {final} = {fname}({name})")
- mapblock.append("try:")
- finallys=["finally:",f"{final}()#","#",finallys]
- else:
- mapblock.extend(
- f"{name} = {fname}({name})"fornameinmap(get_name,self._args)
- )
-
- returnsig,wrapped_name,functions,mapblock,finallys,mutable_args
-
- @classmethod
- defsignature(cls,f):
-r"""Construct a Signature object describing `f`
-
- Compute a Signature so that we can write a function wrapping f with
- the same signature and call-type.
-
- Parameters
- ----------
- f : callable
- A function to be decorated
-
- Returns
- -------
- sig : argmap.Signature
- The Signature of f
-
- Notes
- -----
- The Signature is a namedtuple with names:
-
- name : a unique version of the name of the decorated function
- signature : the inspect.signature of the decorated function
- def_sig : a string used as code to define the new function
- call_sig : a string used as code to call the decorated function
- names : a dict keyed by argument name and index to the argument's name
- n_positional : the number of positional arguments in the signature
- args : the name of the VAR_POSITIONAL argument if any, i.e. \*theseargs
- kwargs : the name of the VAR_KEYWORDS argument if any, i.e. \*\*kwargs
-
- These named attributes of the signature are used in `assemble` and `compile`
- to construct a string of source code for the decorated function.
-
- """
- sig=inspect.signature(f,follow_wrapped=False)
- def_sig=[]
- call_sig=[]
- names={}
-
- kind=None
- args=None
- kwargs=None
- npos=0
- fori,paraminenumerate(sig.parameters.values()):
- # parameters can be position-only, keyword-or-position, keyword-only
- # in any combination, but only in the order as above. we do edge
- # detection to add the appropriate punctuation
- prev=kind
- kind=param.kind
- ifprev==param.POSITIONAL_ONLY!=kind:
- # the last token was position-only, but this one isn't
- def_sig.append("/")
- ifprev!=param.KEYWORD_ONLY==kind!=param.VAR_POSITIONAL:
- # param is the first keyword-only arg and isn't starred
- def_sig.append("*")
-
- # star arguments as appropriate
- ifkind==param.VAR_POSITIONAL:
- name="*"+param.name
- args=param.name
- count=0
- elifkind==param.VAR_KEYWORD:
- name="**"+param.name
- kwargs=param.name
- count=0
- else:
- names[i]=names[param.name]=param.name
- name=param.name
- count=1
-
- # assign to keyword-only args in the function call
- ifkind==param.KEYWORD_ONLY:
- call_sig.append(f"{name} = {name}")
- else:
- npos+=count
- call_sig.append(name)
-
- def_sig.append(name)
-
- fname=cls._name(f)
- def_sig=f'def {fname}({", ".join(def_sig)}):'
-
- ifinspect.isgeneratorfunction(f):
- _return="yield from"
- else:
- _return="return"
-
- call_sig=f"{_return}{{}}({', '.join(call_sig)})"
-
- returncls.Signature(fname,sig,def_sig,call_sig,names,npos,args,kwargs)
-
- Signature=collections.namedtuple(
- "Signature",
- [
- "name",
- "signature",
- "def_sig",
- "call_sig",
- "names",
- "n_positional",
- "args",
- "kwargs",
- ],
- )
-
- @staticmethod
- def_flatten(nestlist,visited):
-"""flattens a recursive list of lists that doesn't have cyclic references
-
- Parameters
- ----------
- nestlist : iterable
- A recursive list of objects to be flattened into a single iterable
-
- visited : set
- A set of object ids which have been walked -- initialize with an
- empty set
-
- Yields
- ------
- Non-list objects contained in nestlist
-
- """
- forthinginnestlist:
- ifisinstance(thing,list):
- ifid(thing)invisited:
- raiseValueError("A cycle was found in nestlist. Be a tree.")
- else:
- visited.add(id(thing))
- yield fromargmap._flatten(thing,visited)
- else:
- yieldthing
-
- _tabs=" "*64
-
- @staticmethod
- def_indent(*lines):
-"""Indent list of code lines to make executable Python code
-
- Indents a tree-recursive list of strings, following the rule that one
- space is added to the tab after a line that ends in a colon, and one is
- removed after a line that ends in an hashmark.
-
- Parameters
- ----------
- *lines : lists and/or strings
- A recursive list of strings to be assembled into properly indented
- code.
-
- Returns
- -------
- code : str
-
- Examples
- --------
-
- argmap._indent(*["try:", "try:", "pass#", "finally:", "pass#", "#",
- "finally:", "pass#"])
-
- renders to
-
- '''try:
- try:
- pass#
- finally:
- pass#
- #
- finally:
- pass#'''
- """
- depth=0
- forlineinargmap._flatten(lines,set()):
- yieldf"{argmap._tabs[:depth]}{line}"
- depth+=(line[-1:]==":")-(line[-1:]=="#")
-
-
-
[docs]defnodes_or_number(which_args):
-"""Decorator to allow number of nodes or container of nodes.
-
- With this decorator, the specified argument can be either a number or a container
- of nodes. If it is a number, the nodes used are `range(n)`.
- This allows `eg.complete_graph(50)` in place of `eg.complete_graph(list(range(50)))`.
- And it also allows `eg.complete_graph(any_list_of_nodes)`.
-
- Parameters
- ----------
- which_args : string or int or sequence of strings or ints
- If string, the name of the argument to be treated.
- If int, the index of the argument to be treated.
- If more than one node argument is allowed, can be a list of locations.
-
- Returns
- -------
- _nodes_or_numbers : function
- Function which replaces int args with ranges.
-
- Examples
- --------
- Decorate functions like this::
-
- @nodes_or_number("nodes")
- def empty_graph(nodes):
- # nodes is converted to a list of nodes
-
- @nodes_or_number(0)
- def empty_graph(nodes):
- # nodes is converted to a list of nodes
-
- @nodes_or_number(["m1", "m2"])
- def grid_2d_graph(m1, m2, periodic=False):
- # m1 and m2 are each converted to a list of nodes
-
- @nodes_or_number([0, 1])
- def grid_2d_graph(m1, m2, periodic=False):
- # m1 and m2 are each converted to a list of nodes
-
- @nodes_or_number(1)
- def full_rary_tree(r, n)
- # presumably r is a number. It is not handled by this decorator.
- # n is converted to a list of nodes
- """
-
- def_nodes_or_number(n):
- try:
- nodes=list(range(n))
- exceptTypeError:
- nodes=tuple(n)
- else:
- ifn<0:
- msg="Negative number of nodes not valid: {n}"
- raiseEasyGraphError(msg)
- return(n,nodes)
-
- try:
- iter_wa=iter(which_args)
- exceptTypeError:
- iter_wa=(which_args,)
-
- returnargmap(_nodes_or_number,*iter_wa)
[docs]defdownload_file(url:str,file_path:Path):
-r"""Download a file from a url.
-
- Args:
- ``url`` (``str``): the url of the file
- ``file_path`` (``str``): the path to the file
- """
- file_path.parent.mkdir(parents=True,exist_ok=True)
- r=requests.get(url,stream=True,verify=True)
- ifr.status_code!=200:
- raiserequests.HTTPError(f"{url} is not accessible.")
- withopen(file_path,"wb")asf:
- forchunkinr.iter_content(chunk_size=1024):
- ifchunk:
- f.write(chunk)
-
-
-
[docs]defcheck_file(file_path:Path,md5:str):
-r"""Check if a file is valid.
-
- Args:
- ``file_path`` (``Path``): The local path of the file.
- ``md5`` (``str``): The md5 of the file.
-
- Raises:
- FileNotFoundError: Not found the file.
- """
- ifnotfile_path.exists():
- raiseFileNotFoundError(f"{file_path} does not exist.")
- else:
- withopen(file_path,"rb")asf:
- data=f.read()
- cur_md5=hashlib.md5(data).hexdigest()
- returncur_md5==md5
-
-
-def_retry(n:int,exception_type=requests.HTTPError):
-r"""A decorator for retrying a function for n times.
-
- Args:
- ``n`` (``int``): The number of times to retry.
- """
-
- defdecorator(fetcher):
- @wraps(fetcher)
- defwrapper(*args,**kwargs):
- foriinrange(n-1):
- try:
- returnfetcher(*args,**kwargs)
- exceptexception_typease:
- warnings.warn(f"Retry downloading({i+1}/{n}): {str(e)}")
- exceptExceptionase:
- raisee
- returnfetcher(*args,**kwargs)
- # raise FileNotFoundError
-
- returnwrapper
-
- returndecorator
-
-
-
[docs]@_retry(3)
-defdownload_and_check(url:str,file_path:Path,md5:str):
-r"""Download a file from a url and check its integrity.
-
- Args:
- ``url`` (``str``): The url of the file.
- ``file_path`` (``Path``): The path to the file.
- ``md5`` (``str``): The md5 of the file.
- """
- ifnotfile_path.exists():
- download_file(url,file_path)
- ifnotcheck_file(file_path,md5):
- file_path.unlink()
- raiseValueError(
- f"{file_path} is corrupted. We will delete it, and try to download it"
- " again."
- )
- returnTrue
[docs]classEasyGraphException(Exception):
-"""Base class for exceptions in EasyGraph."""
-
-
-
[docs]classEasyGraphError(EasyGraphException):
-"""Exception for a serious error in EasyGraph"""
-
-
-
[docs]classEasyGraphNotImplemented(EasyGraphException):
-"""Exception raised by algorithms not implemented for a type of graph."""
-
-
-
[docs]classEasyGraphPointlessConcept(EasyGraphException):
-"""Raised when a null graph is provided as input to an algorithm
- that cannot use it.
-
- The null graph is sometimes considered a pointless concept [1]_,
- thus the name of the exception.
-
- References
- ----------
- .. [1] Harary, F. and Read, R. "Is the Null Graph a Pointless
- Concept?" In Graphs and Combinatorics Conference, George
- Washington University. New York: Springer-Verlag, 1973.
-
- """
[docs]defdefault_log_formatter()->logging.Formatter:
-r"""Create a default formatter of log messages for logging."""
-
- returnlogging.Formatter("[%(levelname)s%(asctime)s]-> %(message)s")
-
-
-
[docs]defsimple_stdout2file(file_path:Union[str,Path])->None:
-r"""This function simply wraps the ``sys.stdout`` stream, and outputs messages to the ``sys.stdout`` and a specified file, simultaneously.
-
- Parameters:
- ``file_path`` (``file_path: Union[str, Path]``): The path of the file to output the messages.
- """
-
- classSimpleLogger:
- def__init__(self,file_path:Path):
- file_path=Path(file_path).absolute()
- assert(
- file_path.parent.exists()
- ),f"The parent directory of {file_path} does not exist."
- self.file_path=file_path
- self.terminal=sys.stdout
- self.file=open(file_path,"a")
-
- defwrite(self,message):
- self.terminal.write(message)
- self.file.write(message)
- self.flush()
-
- defflush(self):
- self.terminal.flush()
- self.file.flush()
-
- file_path=Path(file_path)
- sys.stdout=SimpleLogger(file_path)
-"""
-Priority queue class with updatable priorities.
-Codes from NetworkX - http://networkx.github.io/
-"""
-
-
-importheapq
-
-
-__all__=["MappedQueue"]
-
-
-
[docs]classMappedQueue:
-"""
- The MappedQueue class implements an efficient minimum heap. The
- smallest element can be popped in O(1) time, new elements can be pushed
- in O(log n) time, and any element can be removed or updated in O(log n)
- time. The queue cannot contain duplicate elements and an attempt to push an
- element already in the queue will have no effect.
-
- MappedQueue complements the heapq package from the python standard
- library. While MappedQueue is designed for maximum compatibility with
- heapq, it has slightly different functionality.
-
- Examples
- --------
-
- A `MappedQueue` can be created empty or optionally given an array of
- initial elements. Calling `push()` will add an element and calling `pop()`
- will remove and return the smallest element.
-
- >>> q = MappedQueue([916, 50, 4609, 493, 237])
- >>> q.push(1310)
- True
- >>> x = [q.pop() for i in range(len(q.h))]
- >>> x
- [50, 237, 493, 916, 1310, 4609]
-
- Elements can also be updated or removed from anywhere in the queue.
-
- >>> q = MappedQueue([916, 50, 4609, 493, 237])
- >>> q.remove(493)
- >>> q.update(237, 1117)
- >>> x = [q.pop() for i in range(len(q.h))]
- >>> x
- [50, 916, 1117, 4609]
-
- References
- ----------
- .. [1] Cormen, T. H., Leiserson, C. E., Rivest, R. L., & Stein, C. (2001).
- Introduction to algorithms second edition.
- .. [2] Knuth, D. E. (1997). The art of computer programming (Vol. 3).
- Pearson Education.
- """
-
- def__init__(self,data=[]):
-"""Priority queue class with updatable priorities."""
- self.h=list(data)
- self.d=dict()
- self._heapify()
-
- def__len__(self):
- returnlen(self.h)
-
- def_heapify(self):
-"""Restore heap invariant and recalculate map."""
- heapq.heapify(self.h)
- self.d={elt:posforpos,eltinenumerate(self.h)}
- iflen(self.h)!=len(self.d):
- raiseAssertionError("Heap contains duplicate elements")
-
-
[docs]defpush(self,elt):
-"""Add an element to the queue."""
- # If element is already in queue, do nothing
- ifeltinself.d:
- returnFalse
- # Add element to heap and dict
- pos=len(self.h)
- self.h.append(elt)
- self.d[elt]=pos
- # Restore invariant by sifting down
- self._siftdown(pos)
- returnTrue
-
-
[docs]defpop(self):
-"""Remove and return the smallest element in the queue."""
- # Remove smallest element
- elt=self.h[0]
- delself.d[elt]
- # If elt is last item, remove and return
- iflen(self.h)==1:
- self.h.pop()
- returnelt
- # Replace root with last element
- last=self.h.pop()
- self.h[0]=last
- self.d[last]=0
- # Restore invariant by sifting up, then down
- pos=self._siftup(0)
- self._siftdown(pos)
- # Return smallest element
- returnelt
-
-
[docs]defupdate(self,elt,new):
-"""Replace an element in the queue with a new one."""
- # Replace
- pos=self.d[elt]
- self.h[pos]=new
- delself.d[elt]
- self.d[new]=pos
- # Restore invariant by sifting up, then down
- pos=self._siftup(pos)
- self._siftdown(pos)
-
-
[docs]defremove(self,elt):
-"""Remove an element from the queue."""
- # Find and remove element
- try:
- pos=self.d[elt]
- delself.d[elt]
- exceptKeyError:
- # Not in queue
- raise
- # If elt is last item, remove and return
- ifpos==len(self.h)-1:
- self.h.pop()
- return
- # Replace elt with last element
- last=self.h.pop()
- self.h[pos]=last
- self.d[last]=pos
- # Restore invariant by sifting up, then down
- pos=self._siftup(pos)
- self._siftdown(pos)
-
- def_siftup(self,pos):
-"""Move element at pos down to a leaf by repeatedly moving the smaller
- child up."""
- h,d=self.h,self.d
- elt=h[pos]
- # Continue until element is in a leaf
- end_pos=len(h)
- left_pos=(pos<<1)+1
- whileleft_pos<end_pos:
- # Left child is guaranteed to exist by loop predicate
- left=h[left_pos]
- try:
- right_pos=left_pos+1
- right=h[right_pos]
- # Out-of-place, swap with left unless right is smaller
- ifright<left:
- h[pos],h[right_pos]=right,elt
- pos,right_pos=right_pos,pos
- d[elt],d[right]=pos,right_pos
- else:
- h[pos],h[left_pos]=left,elt
- pos,left_pos=left_pos,pos
- d[elt],d[left]=pos,left_pos
- exceptIndexError:
- # Left leaf is the end of the heap, swap
- h[pos],h[left_pos]=left,elt
- pos,left_pos=left_pos,pos
- d[elt],d[left]=pos,left_pos
- # Update left_pos
- left_pos=(pos<<1)+1
- returnpos
-
- def_siftdown(self,pos):
-"""Restore invariant by repeatedly replacing out-of-place element with
- its parent."""
- h,d=self.h,self.d
- elt=h[pos]
- # Continue until element is at root
- whilepos>0:
- parent_pos=(pos-1)>>1
- parent=h[parent_pos]
- ifparent>elt:
- # Swap out-of-place element with parent
- h[parent_pos],h[pos]=elt,parent
- parent_pos,pos=pos,parent_pos
- d[elt]=pos
- d[parent]=parent_pos
- else:
- # Invariant is satisfied
- break
- returnpos
[docs]defnodes_equal(nodes1,nodes2):
-"""Check if nodes are equal.
-
- Equality here means equal as Python objects.
- Node data must match if included.
- The order of nodes is not relevant.
-
- Parameters
- ----------
- nodes1, nodes2 : iterables of nodes, or (node, datadict) tuples
-
- Returns
- -------
- bool
- True if nodes are equal, False otherwise.
- """
- nlist1=list(nodes1)
- nlist2=list(nodes2)
- try:
- d1=dict(nlist1)
- d2=dict(nlist2)
- except(ValueError,TypeError):
- d1=dict.fromkeys(nlist1)
- d2=dict.fromkeys(nlist2)
- returnd1==d2
-
-
-
[docs]defedges_equal(edges1,edges2,need_data=True):
-"""Check if edges are equal.
-
- Equality here means equal as Python objects.
- Edge data must match if included.
- The order of the edges is not relevant.
-
- Parameters
- ----------
- edges1, edges2 : iterables of with u, v nodes as
- edge tuples (u, v), or
- edge tuples with data dicts (u, v, d), or
- edge tuples with keys and data dicts (u, v, k, d)
-
- Returns
- -------
- bool
- True if edges are equal, False otherwise.
- """
- fromcollectionsimportdefaultdict
-
- d1=defaultdict(dict)
- d2=defaultdict(dict)
- c1=0
- forc1,einenumerate(edges1):
- u,v=e[0],e[1]
- data=[]
- ifneed_data==True:
- data=[e[2:]]
- ifvind1[u]:
- data=d1[u][v]+data
- d1[u][v]=data
- d1[v][u]=data
- c2=0
- forc2,einenumerate(edges2):
- u,v=e[0],e[1]
- data=[]
- ifneed_data==True:
- data=[e[2:]]
- ifvind2[u]:
- data=d2[u][v]+data
- d2[u][v]=data
- d2[v][u]=data
- ifc1!=c2:
- returnFalse
- # can check one direction because lengths are the same.
- forn,nbrdictind1.items():
- fornbr,datalistinnbrdict.items():
- ifnnotind2:
- returnFalse
- ifnbrnotind2[n]:
- returnFalse
- d2datalist=d2[n][nbr]
- fordataindatalist:
- ifdatalist.count(data)!=d2datalist.count(data):
- returnFalse
- returnTrue
[docs]defgraphs_equal(graph1,graph2):
-"""Check if graphs are equal.
-
- Equality here means equal as Python objects (not isomorphism).
- Node, edge and graph data must match.
-
- Parameters
- ----------
- graph1, graph2 : graph
-
- Returns
- -------
- bool
- True if graphs are equal, False otherwise.
- """
- return(
- graph1.adj==graph2.adj
- andgraph1.nodes==graph2.nodes
- andgraph1.graph==graph2.graph
- )
-
-
-# def arbitrary_element(iterable):
-# """Returns an arbitrary element of `iterable` without removing it.
-
-# This is most useful for "peeking" at an arbitrary element of a set,
-# but can be used for any list, dictionary, etc., as well.
-
-# Parameters
-# ----------
-# iterable : `abc.collections.Iterable` instance
-# Any object that implements ``__iter__``, e.g. set, dict, list, tuple,
-# etc.
-
-# Returns
-# -------
-# The object that results from ``next(iter(iterable))``
-
-# Raises
-# ------
-# ValueError
-# If `iterable` is an iterator (because the current implementation of
-# this function would consume an element from the iterator).
-
-# Examples
-# --------
-# Arbitrary elements from common Iterable objects:
-
-# >>> eg.utils.arbitrary_element([1, 2, 3]) # list
-# 1
-# >>> eg.utils.arbitrary_element((1, 2, 3)) # tuple
-# 1
-# >>> eg.utils.arbitrary_element({1, 2, 3}) # set
-# 1
-# >>> d = {k: v for k, v in zip([1, 2, 3], [3, 2, 1])}
-# >>> eg.utils.arbitrary_element(d) # dict_keys
-# 1
-# >>> eg.utils.arbitrary_element(d.values()) # dict values
-# 3
-
-# `str` is also an Iterable:
-
-# >>> eg.utils.arbitrary_element("hello")
-# 'h'
-
-# :exc:`ValueError` is raised if `iterable` is an iterator:
-
-# >>> iterator = iter([1, 2, 3]) # Iterator, *not* Iterable
-# >>> eg.utils.arbitrary_element(iterator)
-# Traceback (most recent call last):
-# ...
-# ValueError: cannot return an arbitrary item from an iterator
-
-# Notes
-# -----
-# This function does not return a *random* element. If `iterable` is
-# ordered, sequential calls will return the same value::
-
-# >>> l = [1, 2, 3]
-# >>> eg.utils.arbitrary_element(l)
-# 1
-# >>> eg.utils.arbitrary_element(l)
-# 1
-
-# """
-# if isinstance(iterable, Iterator):
-# raise ValueError("cannot return an arbitrary item from an iterator")
-# # Another possible implementation is ``for x in iterable: return x``.
-# return next(iter(iterable))
-
[docs]defconvert_node_labels_to_integers(
- G,first_label=0,ordering="default",label_attribute=None
-):
-"""Returns a copy of the graph G with the nodes relabeled using
- consecutive integers.
-
- Parameters
- ----------
- G : graph
- A easygraph graph
-
- first_label : int, optional (default=0)
- An integer specifying the starting offset in numbering nodes.
- The new integer labels are numbered first_label, ..., n-1+first_label.
-
- ordering : string
- "default" : inherit node ordering from G.nodes
- "sorted" : inherit node ordering from sorted(G.nodes)
- "increasing degree" : nodes are sorted by increasing degree
- "decreasing degree" : nodes are sorted by decreasing degree
-
- label_attribute : string, optional (default=None)
- Name of node attribute to store old label. If None no attribute
- is created.
-
- Notes
- -----
- Node and edge attribute data are copied to the new (relabeled) graph.
-
- There is no guarantee that the relabeling of nodes to integers will
- give the same two integers for two (even identical graphs).
- Use the `ordering` argument to try to preserve the order.
-
- See Also
- --------
- relabel_nodes
- """
- N=G.number_of_nodes()+first_label
- ifordering=="default":
- mapping=dict(zip(G.nodes,range(first_label,N)))
- elifordering=="sorted":
- nlist=sorted(G.nodes)
- mapping=dict(zip(nlist,range(first_label,N)))
- elifordering=="increasing degree":
- dv_pairs=[(d,n)for(n,d)inG.degree()]
- dv_pairs.sort()# in-place sort from lowest to highest degree
- mapping=dict(zip([nford,nindv_pairs],range(first_label,N)))
- elifordering=="decreasing degree":
- dv_pairs=[(d,n)for(n,d)inG.degree()]
- dv_pairs.sort()# in-place sort from lowest to highest degree
- dv_pairs.reverse()
- mapping=dict(zip([nford,nindv_pairs],range(first_label,N)))
- else:
- raiseeg.EasyGraphError(f"Unknown node ordering: {ordering}")
- H=relabel_nodes(G,mapping)
- # create node attribute with the old label
- iflabel_attributeisnotNone:
- eg.set_node_attributes(H,{v:kfork,vinmapping.items()},label_attribute)
- returnH
-__all__=["sparse_dropout"]
-
-
-# if not type checking
-fromtypingimportTYPE_CHECKING
-
-
-ifTYPE_CHECKING:
- importtorch
-
-
-
[docs]defsparse_dropout(
- sp_mat:"torch.Tensor",p:float,fill_value:float=0.0
-)->"torch.Tensor":
- importtorch
-
-r"""Dropout function for sparse matrix. This function will return a new sparse matrix with the same shape as the input sparse matrix, but with some elements dropped out.
-
- Args:
- ``sp_mat`` (``torch.Tensor``): The sparse matrix with format ``torch.sparse_coo_tensor``.
- ``p`` (``float``): Probability of an element to be dropped.
- ``fill_value`` (``float``): The fill value for dropped elements. Defaults to ``0.0``.
- """
- device=sp_mat.device
- sp_mat=sp_mat.coalesce()
- assert0<=p<=1
- ifp==0:
- returnsp_mat
- p=torch.ones(sp_mat._nnz(),device=device)*p
- keep_mask=torch.bernoulli(1-p).to(device)
- fill_values=torch.logical_not(keep_mask)*fill_value
- new_sp_mat=torch.sparse_coo_tensor(
- sp_mat._indices(),
- sp_mat._values()*keep_mask+fill_values,
- size=sp_mat.size(),
- device=sp_mat.device,
- dtype=sp_mat.dtype,
- )
- returnnew_sp_mat
[docs]deffrom_pyGraphviz_agraph(A,create_using=None):
-"""Returns a EasyGraph Graph or DiGraph from a PyGraphviz graph.
-
- Parameters
- ----------
- A : PyGraphviz AGraph
- A graph created with PyGraphviz
-
- create_using : EasyGraph graph constructor, optional (default=None)
- Graph type to create. If graph instance, then cleared before populated.
- If `None`, then the appropriate Graph type is inferred from `A`.
-
- Examples
- --------
- >>> K5 = eg.complete_graph(5)
- >>> A = eg.to_pyGraphviz_agraph(K5)
- >>> G = eg.from_pyGraphviz_agraph(A)
-
- Notes
- -----
- The Graph G will have a dictionary G.graph_attr containing
- the default graphviz attributes for graphs, nodes and edges.
-
- Default node attributes will be in the dictionary G.node_attr
- which is keyed by node.
-
- Edge attributes will be returned as edge data in G. With
- edge_attr=False the edge data will be the Graphviz edge weight
- attribute or the value 1 if no edge weight attribute is found.
-
- """
- ifcreate_usingisNone:
- ifA.is_directed():
- ifA.is_strict():
- create_using=eg.DiGraph
- else:
- create_using=eg.MultiDiGraph
- else:
- ifA.is_strict():
- create_using=eg.Graph
- else:
- create_using=eg.MultiGraph
-
- # assign defaults
- N=eg.empty_graph(0,create_using)
- ifA.nameisnotNone:
- N.name=A.name
-
- # add graph attributes
- N.graph.update(A.graph_attr)
-
- # add nodes, attributes to N.node_attr
- forninA.nodes():
- str_attr={str(k):vfork,vinn.attr.items()}
- N.add_node(str(n),**str_attr)
-
- # add edges, assign edge data as dictionary of attributes
- foreinA.edges():
- u,v=str(e[0]),str(e[1])
- attr=dict(e.attr)
- str_attr={str(k):vfork,vinattr.items()}
- ifnotN.is_multigraph():
- ife.nameisnotNone:
- str_attr["key"]=e.name
- N.add_edge(u,v,**str_attr)
- else:
- N.add_edge(u,v,key=e.name,**str_attr)
-
- # add default attributes for graph, nodes, and edges
- # hang them on N.graph_attr
- N.graph["graph"]=dict(A.graph_attr)
- N.graph["node"]=dict(A.node_attr)
- N.graph["edge"]=dict(A.edge_attr)
- returnN
-
-
-
[docs]defto_pyGraphviz_agraph(N):
-"""Returns a pygraphviz graph from a EasyGraph graph N.
-
- Parameters
- ----------
- N : EasyGraph graph
- A graph created with EasyGraph
-
- Examples
- --------
- >>> K5 = eg.complete_graph(5)
- >>> A = eg.to_pyGraphviz_agraph(K5)
-
- Notes
- -----
- If N has an dict N.graph_attr an attempt will be made first
- to copy properties attached to the graph (see from_agraph)
- and then updated with the calling arguments if any.
-
- """
- try:
- importpygraphviz
- exceptImportErroraserr:
- raiseImportError("requires pygraphviz http://pygraphviz.github.io/")fromerr
- directed=N.is_directed()
- strict=eg.number_of_selfloops(N)==0andnotN.is_multigraph()
- A=pygraphviz.AGraph(name=N.name,strict=strict,directed=directed)
-
- # default graph attributes
- A.graph_attr.update(N.graph.get("graph",{}))
- A.node_attr.update(N.graph.get("node",{}))
- A.edge_attr.update(N.graph.get("edge",{}))
-
- A.graph_attr.update(
- (k,v)fork,vinN.graph.items()ifknotin("graph","node","edge")
- )
-
- # add nodes
- forn,nodedatainN.nodes(data=True):
- A.add_node(n)
- # Add node data
- a=A.get_node(n)
- a.attr.update({k:str(v)fork,vinnodedata.items()})
-
- # loop over edges
- ifN.is_multigraph():
- foru,v,key,edgedatainN.edges(data=True,keys=True):
- str_edgedata={k:str(v)fork,vinedgedata.items()ifk!="key"}
- A.add_edge(u,v,key=str(key))
- # Add edge data
- a=A.get_edge(u,v)
- a.attr.update(str_edgedata)
-
- else:
- foru,v,edgedatainN.edges(data=True):
- str_edgedata={k:str(v)fork,vinedgedata.items()}
- A.add_edge(u,v)
- # Add edge data
- a=A.get_edge(u,v)
- a.attr.update(str_edgedata)
-
- returnA